1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6 * 7 * This software was developed by the University of Cambridge Computer 8 * Laboratory (Department of Computer Science and Technology) under Innovate 9 * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10 * Prototype". 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/cpuset.h> 37 #include <sys/kernel.h> 38 #include <sys/linker.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/pcpu.h> 44 #include <sys/proc.h> 45 #include <sys/queue.h> 46 #include <sys/rwlock.h> 47 #include <sys/sched.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_param.h> 58 59 #include <machine/riscvreg.h> 60 #include <machine/cpu.h> 61 #include <machine/fpe.h> 62 #include <machine/machdep.h> 63 #include <machine/pcb.h> 64 #include <machine/smp.h> 65 #include <machine/vm.h> 66 #include <machine/vmparam.h> 67 #include <machine/vmm.h> 68 #include <machine/vmm_instruction_emul.h> 69 70 #include <dev/pci/pcireg.h> 71 72 #include <dev/vmm/vmm_dev.h> 73 #include <dev/vmm/vmm_ktr.h> 74 #include <dev/vmm/vmm_mem.h> 75 76 #include "vmm_stat.h" 77 #include "riscv.h" 78 79 #include "vmm_aplic.h" 80 81 struct vcpu { 82 int flags; 83 enum vcpu_state state; 84 struct mtx mtx; 85 int hostcpu; /* host cpuid this vcpu last ran on */ 86 int vcpuid; 87 void *stats; 88 struct vm_exit exitinfo; 89 uint64_t nextpc; /* (x) next instruction to execute */ 90 struct vm *vm; /* (o) */ 91 void *cookie; /* (i) cpu-specific data */ 92 struct fpreg *guestfpu; /* (a,i) guest fpu state */ 93 }; 94 95 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 96 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 97 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 98 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 99 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 100 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 101 102 struct vmm_mmio_region { 103 uint64_t start; 104 uint64_t end; 105 mem_region_read_t read; 106 mem_region_write_t write; 107 }; 108 #define VM_MAX_MMIO_REGIONS 4 109 110 /* 111 * Initialization: 112 * (o) initialized the first time the VM is created 113 * (i) initialized when VM is created and when it is reinitialized 114 * (x) initialized before use 115 */ 116 struct vm { 117 void *cookie; /* (i) cpu-specific data */ 118 volatile cpuset_t active_cpus; /* (i) active vcpus */ 119 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 120 int suspend; /* (i) stop VM execution */ 121 bool dying; /* (o) is dying */ 122 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 123 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 124 struct vmspace *vmspace; /* (o) guest's address space */ 125 struct vm_mem mem; /* (i) [m+v] guest memory */ 126 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 127 struct vcpu **vcpu; /* (i) guest vcpus */ 128 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 129 /* (o) guest MMIO regions */ 130 /* The following describe the vm cpu topology */ 131 uint16_t sockets; /* (o) num of sockets */ 132 uint16_t cores; /* (o) num of cores/socket */ 133 uint16_t threads; /* (o) num of threads/core */ 134 uint16_t maxcpus; /* (o) max pluggable cpus */ 135 struct sx vcpus_init_lock; /* (o) */ 136 }; 137 138 static bool vmm_initialized = false; 139 140 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 141 142 /* statistics */ 143 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 144 145 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 146 147 static int vmm_ipinum; 148 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 149 "IPI vector used for vcpu notifications"); 150 151 u_int vm_maxcpu; 152 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 153 &vm_maxcpu, 0, "Maximum number of vCPUs"); 154 155 static void vcpu_notify_event_locked(struct vcpu *vcpu); 156 157 /* global statistics */ 158 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 159 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 160 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 161 162 /* 163 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 164 * is a safe value for now. 165 */ 166 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 167 168 static void 169 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 170 { 171 vmmops_vcpu_cleanup(vcpu->cookie); 172 vcpu->cookie = NULL; 173 if (destroy) { 174 vmm_stat_free(vcpu->stats); 175 fpu_save_area_free(vcpu->guestfpu); 176 vcpu_lock_destroy(vcpu); 177 } 178 } 179 180 static struct vcpu * 181 vcpu_alloc(struct vm *vm, int vcpu_id) 182 { 183 struct vcpu *vcpu; 184 185 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 186 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 187 188 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 189 vcpu_lock_init(vcpu); 190 vcpu->state = VCPU_IDLE; 191 vcpu->hostcpu = NOCPU; 192 vcpu->vcpuid = vcpu_id; 193 vcpu->vm = vm; 194 vcpu->guestfpu = fpu_save_area_alloc(); 195 vcpu->stats = vmm_stat_alloc(); 196 return (vcpu); 197 } 198 199 static void 200 vcpu_init(struct vcpu *vcpu) 201 { 202 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 203 MPASS(vcpu->cookie != NULL); 204 fpu_save_area_reset(vcpu->guestfpu); 205 vmm_stat_init(vcpu->stats); 206 } 207 208 struct vm_exit * 209 vm_exitinfo(struct vcpu *vcpu) 210 { 211 return (&vcpu->exitinfo); 212 } 213 214 static int 215 vmm_init(void) 216 { 217 218 vm_maxcpu = mp_ncpus; 219 220 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 221 222 if (vm_maxcpu > VM_MAXCPU) { 223 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 224 vm_maxcpu = VM_MAXCPU; 225 } 226 227 if (vm_maxcpu == 0) 228 vm_maxcpu = 1; 229 230 return (vmmops_modinit()); 231 } 232 233 static int 234 vmm_handler(module_t mod, int what, void *arg) 235 { 236 int error; 237 238 switch (what) { 239 case MOD_LOAD: 240 error = vmmdev_init(); 241 if (error != 0) 242 break; 243 error = vmm_init(); 244 if (error == 0) 245 vmm_initialized = true; 246 else 247 (void)vmmdev_cleanup(); 248 break; 249 case MOD_UNLOAD: 250 error = vmmdev_cleanup(); 251 if (error == 0 && vmm_initialized) { 252 error = vmmops_modcleanup(); 253 if (error) { 254 /* 255 * Something bad happened - prevent new 256 * VMs from being created 257 */ 258 vmm_initialized = false; 259 } 260 } 261 break; 262 default: 263 error = 0; 264 break; 265 } 266 return (error); 267 } 268 269 static moduledata_t vmm_kmod = { 270 "vmm", 271 vmm_handler, 272 NULL 273 }; 274 275 /* 276 * vmm initialization has the following dependencies: 277 * 278 * - vmm device initialization requires an initialized devfs. 279 */ 280 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); 281 MODULE_VERSION(vmm, 1); 282 283 static void 284 vm_init(struct vm *vm, bool create) 285 { 286 int i; 287 288 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 289 MPASS(vm->cookie != NULL); 290 291 CPU_ZERO(&vm->active_cpus); 292 CPU_ZERO(&vm->debug_cpus); 293 294 vm->suspend = 0; 295 CPU_ZERO(&vm->suspended_cpus); 296 297 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 298 299 if (!create) { 300 for (i = 0; i < vm->maxcpus; i++) { 301 if (vm->vcpu[i] != NULL) 302 vcpu_init(vm->vcpu[i]); 303 } 304 } 305 } 306 307 void 308 vm_disable_vcpu_creation(struct vm *vm) 309 { 310 sx_xlock(&vm->vcpus_init_lock); 311 vm->dying = true; 312 sx_xunlock(&vm->vcpus_init_lock); 313 } 314 315 struct vcpu * 316 vm_alloc_vcpu(struct vm *vm, int vcpuid) 317 { 318 struct vcpu *vcpu; 319 320 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 321 return (NULL); 322 323 /* Some interrupt controllers may have a CPU limit */ 324 if (vcpuid >= aplic_max_cpu_count(vm->cookie)) 325 return (NULL); 326 327 vcpu = (struct vcpu *) 328 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 329 if (__predict_true(vcpu != NULL)) 330 return (vcpu); 331 332 sx_xlock(&vm->vcpus_init_lock); 333 vcpu = vm->vcpu[vcpuid]; 334 if (vcpu == NULL && !vm->dying) { 335 vcpu = vcpu_alloc(vm, vcpuid); 336 vcpu_init(vcpu); 337 338 /* 339 * Ensure vCPU is fully created before updating pointer 340 * to permit unlocked reads above. 341 */ 342 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 343 (uintptr_t)vcpu); 344 } 345 sx_xunlock(&vm->vcpus_init_lock); 346 return (vcpu); 347 } 348 349 void 350 vm_slock_vcpus(struct vm *vm) 351 { 352 sx_slock(&vm->vcpus_init_lock); 353 } 354 355 void 356 vm_unlock_vcpus(struct vm *vm) 357 { 358 sx_unlock(&vm->vcpus_init_lock); 359 } 360 361 int 362 vm_create(const char *name, struct vm **retvm) 363 { 364 struct vm *vm; 365 struct vmspace *vmspace; 366 367 /* 368 * If vmm.ko could not be successfully initialized then don't attempt 369 * to create the virtual machine. 370 */ 371 if (!vmm_initialized) 372 return (ENXIO); 373 374 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 375 return (EINVAL); 376 377 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 378 if (vmspace == NULL) 379 return (ENOMEM); 380 381 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 382 strcpy(vm->name, name); 383 vm->vmspace = vmspace; 384 vm_mem_init(&vm->mem); 385 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 386 387 vm->sockets = 1; 388 vm->cores = 1; /* XXX backwards compatibility */ 389 vm->threads = 1; /* XXX backwards compatibility */ 390 vm->maxcpus = vm_maxcpu; 391 392 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 393 M_WAITOK | M_ZERO); 394 395 vm_init(vm, true); 396 397 *retvm = vm; 398 return (0); 399 } 400 401 void 402 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 403 uint16_t *threads, uint16_t *maxcpus) 404 { 405 *sockets = vm->sockets; 406 *cores = vm->cores; 407 *threads = vm->threads; 408 *maxcpus = vm->maxcpus; 409 } 410 411 uint16_t 412 vm_get_maxcpus(struct vm *vm) 413 { 414 return (vm->maxcpus); 415 } 416 417 int 418 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 419 uint16_t threads, uint16_t maxcpus) 420 { 421 /* Ignore maxcpus. */ 422 if ((sockets * cores * threads) > vm->maxcpus) 423 return (EINVAL); 424 vm->sockets = sockets; 425 vm->cores = cores; 426 vm->threads = threads; 427 return(0); 428 } 429 430 static void 431 vm_cleanup(struct vm *vm, bool destroy) 432 { 433 int i; 434 435 if (destroy) 436 vm_xlock_memsegs(vm); 437 else 438 vm_assert_memseg_xlocked(vm); 439 440 aplic_detach_from_vm(vm->cookie); 441 442 for (i = 0; i < vm->maxcpus; i++) { 443 if (vm->vcpu[i] != NULL) 444 vcpu_cleanup(vm->vcpu[i], destroy); 445 } 446 447 vmmops_cleanup(vm->cookie); 448 449 vm_mem_cleanup(vm); 450 if (destroy) { 451 vm_mem_destroy(vm); 452 453 vmmops_vmspace_free(vm->vmspace); 454 vm->vmspace = NULL; 455 456 for (i = 0; i < vm->maxcpus; i++) 457 free(vm->vcpu[i], M_VMM); 458 free(vm->vcpu, M_VMM); 459 sx_destroy(&vm->vcpus_init_lock); 460 } 461 } 462 463 void 464 vm_destroy(struct vm *vm) 465 { 466 467 vm_cleanup(vm, true); 468 469 free(vm, M_VMM); 470 } 471 472 int 473 vm_reinit(struct vm *vm) 474 { 475 int error; 476 477 /* 478 * A virtual machine can be reset only if all vcpus are suspended. 479 */ 480 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 481 vm_cleanup(vm, false); 482 vm_init(vm, false); 483 error = 0; 484 } else { 485 error = EBUSY; 486 } 487 488 return (error); 489 } 490 491 const char * 492 vm_name(struct vm *vm) 493 { 494 return (vm->name); 495 } 496 497 int 498 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 499 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 500 { 501 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 502 } 503 504 void 505 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 506 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 507 { 508 int i; 509 510 for (i = 0; i < nitems(vm->mmio_region); i++) { 511 if (vm->mmio_region[i].start == 0 && 512 vm->mmio_region[i].end == 0) { 513 vm->mmio_region[i].start = start; 514 vm->mmio_region[i].end = start + size; 515 vm->mmio_region[i].read = mmio_read; 516 vm->mmio_region[i].write = mmio_write; 517 return; 518 } 519 } 520 521 panic("%s: No free MMIO region", __func__); 522 } 523 524 void 525 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 526 { 527 int i; 528 529 for (i = 0; i < nitems(vm->mmio_region); i++) { 530 if (vm->mmio_region[i].start == start && 531 vm->mmio_region[i].end == start + size) { 532 memset(&vm->mmio_region[i], 0, 533 sizeof(vm->mmio_region[i])); 534 return; 535 } 536 } 537 538 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 539 start + size); 540 } 541 542 static int 543 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 544 { 545 struct vm *vm; 546 struct vm_exit *vme; 547 struct vie *vie; 548 struct hyp *hyp; 549 uint64_t fault_ipa; 550 struct vm_guest_paging *paging; 551 struct vmm_mmio_region *vmr; 552 int error, i; 553 554 vm = vcpu->vm; 555 hyp = vm->cookie; 556 if (!hyp->aplic_attached) 557 goto out_user; 558 559 vme = &vcpu->exitinfo; 560 vie = &vme->u.inst_emul.vie; 561 paging = &vme->u.inst_emul.paging; 562 563 fault_ipa = vme->u.inst_emul.gpa; 564 565 vmr = NULL; 566 for (i = 0; i < nitems(vm->mmio_region); i++) { 567 if (vm->mmio_region[i].start <= fault_ipa && 568 vm->mmio_region[i].end > fault_ipa) { 569 vmr = &vm->mmio_region[i]; 570 break; 571 } 572 } 573 if (vmr == NULL) 574 goto out_user; 575 576 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 577 vmr->read, vmr->write, retu); 578 return (error); 579 580 out_user: 581 *retu = true; 582 return (0); 583 } 584 585 int 586 vm_suspend(struct vm *vm, enum vm_suspend_how how) 587 { 588 int i; 589 590 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 591 return (EINVAL); 592 593 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 594 VM_CTR2(vm, "virtual machine already suspended %d/%d", 595 vm->suspend, how); 596 return (EALREADY); 597 } 598 599 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 600 601 /* 602 * Notify all active vcpus that they are now suspended. 603 */ 604 for (i = 0; i < vm->maxcpus; i++) { 605 if (CPU_ISSET(i, &vm->active_cpus)) 606 vcpu_notify_event(vm_vcpu(vm, i)); 607 } 608 609 return (0); 610 } 611 612 void 613 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 614 { 615 struct vm *vm = vcpu->vm; 616 struct vm_exit *vmexit; 617 618 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 619 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 620 621 vmexit = vm_exitinfo(vcpu); 622 vmexit->pc = pc; 623 vmexit->inst_length = 4; 624 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 625 vmexit->u.suspended.how = vm->suspend; 626 } 627 628 void 629 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 630 { 631 struct vm_exit *vmexit; 632 633 vmexit = vm_exitinfo(vcpu); 634 vmexit->pc = pc; 635 vmexit->inst_length = 4; 636 vmexit->exitcode = VM_EXITCODE_DEBUG; 637 } 638 639 int 640 vm_activate_cpu(struct vcpu *vcpu) 641 { 642 struct vm *vm = vcpu->vm; 643 644 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 645 return (EBUSY); 646 647 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 648 return (0); 649 650 } 651 652 int 653 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 654 { 655 if (vcpu == NULL) { 656 vm->debug_cpus = vm->active_cpus; 657 for (int i = 0; i < vm->maxcpus; i++) { 658 if (CPU_ISSET(i, &vm->active_cpus)) 659 vcpu_notify_event(vm_vcpu(vm, i)); 660 } 661 } else { 662 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 663 return (EINVAL); 664 665 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 666 vcpu_notify_event(vcpu); 667 } 668 return (0); 669 } 670 671 int 672 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 673 { 674 675 if (vcpu == NULL) { 676 CPU_ZERO(&vm->debug_cpus); 677 } else { 678 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 679 return (EINVAL); 680 681 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 682 } 683 return (0); 684 } 685 686 int 687 vcpu_debugged(struct vcpu *vcpu) 688 { 689 690 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 691 } 692 693 cpuset_t 694 vm_active_cpus(struct vm *vm) 695 { 696 697 return (vm->active_cpus); 698 } 699 700 cpuset_t 701 vm_debug_cpus(struct vm *vm) 702 { 703 704 return (vm->debug_cpus); 705 } 706 707 cpuset_t 708 vm_suspended_cpus(struct vm *vm) 709 { 710 711 return (vm->suspended_cpus); 712 } 713 714 715 void * 716 vcpu_stats(struct vcpu *vcpu) 717 { 718 719 return (vcpu->stats); 720 } 721 722 /* 723 * This function is called to ensure that a vcpu "sees" a pending event 724 * as soon as possible: 725 * - If the vcpu thread is sleeping then it is woken up. 726 * - If the vcpu is running on a different host_cpu then an IPI will be directed 727 * to the host_cpu to cause the vcpu to trap into the hypervisor. 728 */ 729 static void 730 vcpu_notify_event_locked(struct vcpu *vcpu) 731 { 732 int hostcpu; 733 734 hostcpu = vcpu->hostcpu; 735 if (vcpu->state == VCPU_RUNNING) { 736 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 737 if (hostcpu != curcpu) { 738 ipi_cpu(hostcpu, vmm_ipinum); 739 } else { 740 /* 741 * If the 'vcpu' is running on 'curcpu' then it must 742 * be sending a notification to itself (e.g. SELF_IPI). 743 * The pending event will be picked up when the vcpu 744 * transitions back to guest context. 745 */ 746 } 747 } else { 748 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 749 "with hostcpu %d", vcpu->state, hostcpu)); 750 if (vcpu->state == VCPU_SLEEPING) 751 wakeup_one(vcpu); 752 } 753 } 754 755 void 756 vcpu_notify_event(struct vcpu *vcpu) 757 { 758 vcpu_lock(vcpu); 759 vcpu_notify_event_locked(vcpu); 760 vcpu_unlock(vcpu); 761 } 762 763 struct vmspace * 764 vm_vmspace(struct vm *vm) 765 { 766 return (vm->vmspace); 767 } 768 769 struct vm_mem * 770 vm_mem(struct vm *vm) 771 { 772 return (&vm->mem); 773 } 774 775 static void 776 restore_guest_fpustate(struct vcpu *vcpu) 777 { 778 779 /* Flush host state to the pcb. */ 780 fpe_state_save(curthread); 781 782 /* Ensure the VFP state will be re-loaded when exiting the guest. */ 783 PCPU_SET(fpcurthread, NULL); 784 785 /* restore guest FPU state */ 786 fpe_enable(); 787 fpe_restore(vcpu->guestfpu); 788 789 /* 790 * The FPU is now "dirty" with the guest's state so turn on emulation 791 * to trap any access to the FPU by the host. 792 */ 793 fpe_disable(); 794 } 795 796 static void 797 save_guest_fpustate(struct vcpu *vcpu) 798 { 799 800 /* Save guest FPE state. */ 801 fpe_enable(); 802 fpe_store(vcpu->guestfpu); 803 fpe_disable(); 804 805 KASSERT(PCPU_GET(fpcurthread) == NULL, 806 ("%s: fpcurthread set with guest registers", __func__)); 807 } 808 809 static int 810 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 811 bool from_idle) 812 { 813 int error; 814 815 vcpu_assert_locked(vcpu); 816 817 /* 818 * State transitions from the vmmdev_ioctl() must always begin from 819 * the VCPU_IDLE state. This guarantees that there is only a single 820 * ioctl() operating on a vcpu at any point. 821 */ 822 if (from_idle) { 823 while (vcpu->state != VCPU_IDLE) { 824 vcpu_notify_event_locked(vcpu); 825 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 826 } 827 } else { 828 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 829 "vcpu idle state")); 830 } 831 832 if (vcpu->state == VCPU_RUNNING) { 833 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 834 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 835 } else { 836 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 837 "vcpu that is not running", vcpu->hostcpu)); 838 } 839 840 /* 841 * The following state transitions are allowed: 842 * IDLE -> FROZEN -> IDLE 843 * FROZEN -> RUNNING -> FROZEN 844 * FROZEN -> SLEEPING -> FROZEN 845 */ 846 switch (vcpu->state) { 847 case VCPU_IDLE: 848 case VCPU_RUNNING: 849 case VCPU_SLEEPING: 850 error = (newstate != VCPU_FROZEN); 851 break; 852 case VCPU_FROZEN: 853 error = (newstate == VCPU_FROZEN); 854 break; 855 default: 856 error = 1; 857 break; 858 } 859 860 if (error) 861 return (EBUSY); 862 863 vcpu->state = newstate; 864 if (newstate == VCPU_RUNNING) 865 vcpu->hostcpu = curcpu; 866 else 867 vcpu->hostcpu = NOCPU; 868 869 if (newstate == VCPU_IDLE) 870 wakeup(&vcpu->state); 871 872 return (0); 873 } 874 875 static void 876 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 877 { 878 int error; 879 880 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 881 panic("Error %d setting state to %d\n", error, newstate); 882 } 883 884 static void 885 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 886 { 887 int error; 888 889 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 890 panic("Error %d setting state to %d", error, newstate); 891 } 892 893 int 894 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 895 { 896 897 if (type < 0 || type >= VM_CAP_MAX) 898 return (EINVAL); 899 900 return (vmmops_getcap(vcpu->cookie, type, retval)); 901 } 902 903 int 904 vm_set_capability(struct vcpu *vcpu, int type, int val) 905 { 906 907 if (type < 0 || type >= VM_CAP_MAX) 908 return (EINVAL); 909 910 return (vmmops_setcap(vcpu->cookie, type, val)); 911 } 912 913 struct vm * 914 vcpu_vm(struct vcpu *vcpu) 915 { 916 917 return (vcpu->vm); 918 } 919 920 int 921 vcpu_vcpuid(struct vcpu *vcpu) 922 { 923 924 return (vcpu->vcpuid); 925 } 926 927 void * 928 vcpu_get_cookie(struct vcpu *vcpu) 929 { 930 931 return (vcpu->cookie); 932 } 933 934 struct vcpu * 935 vm_vcpu(struct vm *vm, int vcpuid) 936 { 937 938 return (vm->vcpu[vcpuid]); 939 } 940 941 int 942 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 943 { 944 int error; 945 946 vcpu_lock(vcpu); 947 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 948 vcpu_unlock(vcpu); 949 950 return (error); 951 } 952 953 enum vcpu_state 954 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 955 { 956 enum vcpu_state state; 957 958 vcpu_lock(vcpu); 959 state = vcpu->state; 960 if (hostcpu != NULL) 961 *hostcpu = vcpu->hostcpu; 962 vcpu_unlock(vcpu); 963 964 return (state); 965 } 966 967 int 968 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 969 { 970 971 if (reg >= VM_REG_LAST) 972 return (EINVAL); 973 974 return (vmmops_getreg(vcpu->cookie, reg, retval)); 975 } 976 977 int 978 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 979 { 980 int error; 981 982 if (reg >= VM_REG_LAST) 983 return (EINVAL); 984 error = vmmops_setreg(vcpu->cookie, reg, val); 985 if (error || reg != VM_REG_GUEST_SEPC) 986 return (error); 987 988 vcpu->nextpc = val; 989 990 return (0); 991 } 992 993 void * 994 vm_get_cookie(struct vm *vm) 995 { 996 997 return (vm->cookie); 998 } 999 1000 int 1001 vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 1002 { 1003 1004 return (vmmops_exception(vcpu->cookie, scause)); 1005 } 1006 1007 int 1008 vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 1009 { 1010 1011 return (aplic_attach_to_vm(vm->cookie, descr)); 1012 } 1013 1014 int 1015 vm_assert_irq(struct vm *vm, uint32_t irq) 1016 { 1017 1018 return (aplic_inject_irq(vm->cookie, -1, irq, true)); 1019 } 1020 1021 int 1022 vm_deassert_irq(struct vm *vm, uint32_t irq) 1023 { 1024 1025 return (aplic_inject_irq(vm->cookie, -1, irq, false)); 1026 } 1027 1028 int 1029 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1030 int func) 1031 { 1032 1033 return (aplic_inject_msi(vm->cookie, msg, addr)); 1034 } 1035 1036 static int 1037 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1038 { 1039 1040 vcpu_lock(vcpu); 1041 1042 while (1) { 1043 if (aplic_check_pending(vcpu->cookie)) 1044 break; 1045 1046 if (riscv_check_ipi(vcpu->cookie, false)) 1047 break; 1048 1049 if (riscv_check_interrupts_pending(vcpu->cookie)) 1050 break; 1051 1052 if (vcpu_should_yield(vcpu)) 1053 break; 1054 1055 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1056 /* 1057 * XXX msleep_spin() cannot be interrupted by signals so 1058 * wake up periodically to check pending signals. 1059 */ 1060 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1061 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1062 } 1063 vcpu_unlock(vcpu); 1064 1065 *retu = false; 1066 1067 return (0); 1068 } 1069 1070 static int 1071 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1072 { 1073 struct vm *vm; 1074 struct vm_exit *vme; 1075 struct vm_map *map; 1076 uint64_t addr; 1077 pmap_t pmap; 1078 int ftype, rv; 1079 1080 vm = vcpu->vm; 1081 vme = &vcpu->exitinfo; 1082 1083 pmap = vmspace_pmap(vm->vmspace); 1084 addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 1085 1086 dprintf("%s: %lx\n", __func__, addr); 1087 1088 switch (vme->scause) { 1089 case SCAUSE_STORE_GUEST_PAGE_FAULT: 1090 ftype = VM_PROT_WRITE; 1091 break; 1092 case SCAUSE_FETCH_GUEST_PAGE_FAULT: 1093 ftype = VM_PROT_EXECUTE; 1094 break; 1095 case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1096 ftype = VM_PROT_READ; 1097 break; 1098 default: 1099 panic("unknown page trap: %lu", vme->scause); 1100 } 1101 1102 /* The page exists, but the page table needs to be updated. */ 1103 if (pmap_fault(pmap, addr, ftype)) 1104 return (0); 1105 1106 map = &vm->vmspace->vm_map; 1107 rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1108 if (rv != KERN_SUCCESS) { 1109 printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1110 __func__, addr, ftype, rv); 1111 return (EFAULT); 1112 } 1113 1114 return (0); 1115 } 1116 1117 static int 1118 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1119 { 1120 struct vm *vm = vcpu->vm; 1121 int error, i; 1122 struct thread *td; 1123 1124 error = 0; 1125 td = curthread; 1126 1127 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1128 1129 /* 1130 * Wait until all 'active_cpus' have suspended themselves. 1131 * 1132 * Since a VM may be suspended at any time including when one or 1133 * more vcpus are doing a rendezvous we need to call the rendezvous 1134 * handler while we are waiting to prevent a deadlock. 1135 */ 1136 vcpu_lock(vcpu); 1137 while (error == 0) { 1138 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1139 break; 1140 1141 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1142 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1143 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1144 if (td_ast_pending(td, TDA_SUSPEND)) { 1145 vcpu_unlock(vcpu); 1146 error = thread_check_susp(td, false); 1147 vcpu_lock(vcpu); 1148 } 1149 } 1150 vcpu_unlock(vcpu); 1151 1152 /* 1153 * Wakeup the other sleeping vcpus and return to userspace. 1154 */ 1155 for (i = 0; i < vm->maxcpus; i++) { 1156 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1157 vcpu_notify_event(vm_vcpu(vm, i)); 1158 } 1159 } 1160 1161 *retu = true; 1162 return (error); 1163 } 1164 1165 int 1166 vm_run(struct vcpu *vcpu) 1167 { 1168 struct vm_eventinfo evinfo; 1169 struct vm_exit *vme; 1170 struct vm *vm; 1171 pmap_t pmap; 1172 int error; 1173 int vcpuid; 1174 bool retu; 1175 1176 vm = vcpu->vm; 1177 1178 dprintf("%s\n", __func__); 1179 1180 vcpuid = vcpu->vcpuid; 1181 1182 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1183 return (EINVAL); 1184 1185 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1186 return (EINVAL); 1187 1188 pmap = vmspace_pmap(vm->vmspace); 1189 vme = &vcpu->exitinfo; 1190 evinfo.rptr = NULL; 1191 evinfo.sptr = &vm->suspend; 1192 evinfo.iptr = NULL; 1193 restart: 1194 critical_enter(); 1195 1196 restore_guest_fpustate(vcpu); 1197 1198 vcpu_require_state(vcpu, VCPU_RUNNING); 1199 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1200 vcpu_require_state(vcpu, VCPU_FROZEN); 1201 1202 save_guest_fpustate(vcpu); 1203 1204 critical_exit(); 1205 1206 if (error == 0) { 1207 retu = false; 1208 switch (vme->exitcode) { 1209 case VM_EXITCODE_INST_EMUL: 1210 vcpu->nextpc = vme->pc + vme->inst_length; 1211 error = vm_handle_inst_emul(vcpu, &retu); 1212 break; 1213 case VM_EXITCODE_WFI: 1214 vcpu->nextpc = vme->pc + vme->inst_length; 1215 error = vm_handle_wfi(vcpu, vme, &retu); 1216 break; 1217 case VM_EXITCODE_ECALL: 1218 /* Handle in userland. */ 1219 vcpu->nextpc = vme->pc + vme->inst_length; 1220 retu = true; 1221 break; 1222 case VM_EXITCODE_PAGING: 1223 vcpu->nextpc = vme->pc; 1224 error = vm_handle_paging(vcpu, &retu); 1225 break; 1226 case VM_EXITCODE_BOGUS: 1227 vcpu->nextpc = vme->pc; 1228 retu = false; 1229 error = 0; 1230 break; 1231 case VM_EXITCODE_SUSPENDED: 1232 vcpu->nextpc = vme->pc; 1233 error = vm_handle_suspend(vcpu, &retu); 1234 break; 1235 default: 1236 /* Handle in userland. */ 1237 vcpu->nextpc = vme->pc; 1238 retu = true; 1239 break; 1240 } 1241 } 1242 1243 if (error == 0 && retu == false) 1244 goto restart; 1245 1246 return (error); 1247 } 1248