1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/sysctl.h> 37 #include <sys/malloc.h> 38 #include <sys/pcpu.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/systm.h> 45 46 #include <vm/vm.h> 47 48 #include <machine/vm.h> 49 #include <machine/pcb.h> 50 #include <machine/smp.h> 51 #include <x86/apicreg.h> 52 53 #include <machine/vmm.h> 54 #include "vmm_host.h" 55 #include "vmm_mem.h" 56 #include "vmm_util.h" 57 #include <machine/vmm_dev.h> 58 #include "vlapic.h" 59 #include "vmm_msr.h" 60 #include "vmm_ipi.h" 61 #include "vmm_stat.h" 62 #include "vmm_lapic.h" 63 64 #include "io/ppt.h" 65 #include "io/iommu.h" 66 67 struct vlapic; 68 69 struct vcpu { 70 int flags; 71 enum vcpu_state state; 72 struct mtx mtx; 73 int hostcpu; /* host cpuid this vcpu last ran on */ 74 uint64_t guest_msrs[VMM_MSR_NUM]; 75 struct vlapic *vlapic; 76 int vcpuid; 77 struct savefpu *guestfpu; /* guest fpu state */ 78 void *stats; 79 struct vm_exit exitinfo; 80 enum x2apic_state x2apic_state; 81 int nmi_pending; 82 }; 83 84 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 85 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 86 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 87 88 #define VM_MAX_MEMORY_SEGMENTS 2 89 90 struct vm { 91 void *cookie; /* processor-specific data */ 92 void *iommu; /* iommu-specific data */ 93 struct vcpu vcpu[VM_MAXCPU]; 94 int num_mem_segs; 95 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 96 char name[VM_MAX_NAMELEN]; 97 98 /* 99 * Set of active vcpus. 100 * An active vcpu is one that has been started implicitly (BSP) or 101 * explicitly (AP) by sending it a startup ipi. 102 */ 103 cpuset_t active_cpus; 104 }; 105 106 static int vmm_initialized; 107 108 static struct vmm_ops *ops; 109 #define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 110 #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 111 112 #define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 113 #define VMRUN(vmi, vcpu, rip) \ 114 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 115 #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 116 #define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 117 (ops != NULL ? \ 118 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 119 ENXIO) 120 #define VMMMAP_GET(vmi, gpa) \ 121 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 122 #define VMGETREG(vmi, vcpu, num, retval) \ 123 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 124 #define VMSETREG(vmi, vcpu, num, val) \ 125 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 126 #define VMGETDESC(vmi, vcpu, num, desc) \ 127 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 128 #define VMSETDESC(vmi, vcpu, num, desc) \ 129 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 130 #define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 131 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 132 #define VMGETCAP(vmi, vcpu, num, retval) \ 133 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 134 #define VMSETCAP(vmi, vcpu, num, val) \ 135 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 136 137 #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 138 #define fpu_stop_emulating() clts() 139 140 static MALLOC_DEFINE(M_VM, "vm", "vm"); 141 CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 142 143 /* statistics */ 144 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 145 146 static void 147 vcpu_cleanup(struct vcpu *vcpu) 148 { 149 vlapic_cleanup(vcpu->vlapic); 150 vmm_stat_free(vcpu->stats); 151 fpu_save_area_free(vcpu->guestfpu); 152 } 153 154 static void 155 vcpu_init(struct vm *vm, uint32_t vcpu_id) 156 { 157 struct vcpu *vcpu; 158 159 vcpu = &vm->vcpu[vcpu_id]; 160 161 vcpu_lock_init(vcpu); 162 vcpu->hostcpu = NOCPU; 163 vcpu->vcpuid = vcpu_id; 164 vcpu->vlapic = vlapic_init(vm, vcpu_id); 165 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 166 vcpu->guestfpu = fpu_save_area_alloc(); 167 fpu_save_area_reset(vcpu->guestfpu); 168 vcpu->stats = vmm_stat_alloc(); 169 } 170 171 struct vm_exit * 172 vm_exitinfo(struct vm *vm, int cpuid) 173 { 174 struct vcpu *vcpu; 175 176 if (cpuid < 0 || cpuid >= VM_MAXCPU) 177 panic("vm_exitinfo: invalid cpuid %d", cpuid); 178 179 vcpu = &vm->vcpu[cpuid]; 180 181 return (&vcpu->exitinfo); 182 } 183 184 static int 185 vmm_init(void) 186 { 187 int error; 188 189 vmm_host_state_init(); 190 vmm_ipi_init(); 191 192 error = vmm_mem_init(); 193 if (error) 194 return (error); 195 196 if (vmm_is_intel()) 197 ops = &vmm_ops_intel; 198 else if (vmm_is_amd()) 199 ops = &vmm_ops_amd; 200 else 201 return (ENXIO); 202 203 vmm_msr_init(); 204 205 return (VMM_INIT()); 206 } 207 208 static int 209 vmm_handler(module_t mod, int what, void *arg) 210 { 211 int error; 212 213 switch (what) { 214 case MOD_LOAD: 215 vmmdev_init(); 216 if (ppt_num_devices() > 0) 217 iommu_init(); 218 error = vmm_init(); 219 if (error == 0) 220 vmm_initialized = 1; 221 break; 222 case MOD_UNLOAD: 223 error = vmmdev_cleanup(); 224 if (error == 0) { 225 iommu_cleanup(); 226 vmm_ipi_cleanup(); 227 error = VMM_CLEANUP(); 228 /* 229 * Something bad happened - prevent new 230 * VMs from being created 231 */ 232 if (error) 233 vmm_initialized = 0; 234 } 235 break; 236 default: 237 error = 0; 238 break; 239 } 240 return (error); 241 } 242 243 static moduledata_t vmm_kmod = { 244 "vmm", 245 vmm_handler, 246 NULL 247 }; 248 249 /* 250 * vmm initialization has the following dependencies: 251 * 252 * - iommu initialization must happen after the pci passthru driver has had 253 * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 254 * 255 * - VT-x initialization requires smp_rendezvous() and therefore must happen 256 * after SMP is fully functional (after SI_SUB_SMP). 257 */ 258 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 259 MODULE_VERSION(vmm, 1); 260 261 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 262 263 int 264 vm_create(const char *name, struct vm **retvm) 265 { 266 int i; 267 struct vm *vm; 268 vm_paddr_t maxaddr; 269 270 const int BSP = 0; 271 272 /* 273 * If vmm.ko could not be successfully initialized then don't attempt 274 * to create the virtual machine. 275 */ 276 if (!vmm_initialized) 277 return (ENXIO); 278 279 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 280 return (EINVAL); 281 282 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 283 strcpy(vm->name, name); 284 vm->cookie = VMINIT(vm); 285 286 for (i = 0; i < VM_MAXCPU; i++) { 287 vcpu_init(vm, i); 288 guest_msrs_init(vm, i); 289 } 290 291 maxaddr = vmm_mem_maxaddr(); 292 vm->iommu = iommu_create_domain(maxaddr); 293 vm_activate_cpu(vm, BSP); 294 295 *retvm = vm; 296 return (0); 297 } 298 299 static void 300 vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 301 { 302 size_t len; 303 vm_paddr_t hpa; 304 void *host_domain; 305 306 host_domain = iommu_host_domain(); 307 308 len = 0; 309 while (len < seg->len) { 310 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 311 if (hpa == (vm_paddr_t)-1) { 312 panic("vm_free_mem_segs: cannot free hpa " 313 "associated with gpa 0x%016lx", seg->gpa + len); 314 } 315 316 /* 317 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 318 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 319 */ 320 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 321 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 322 323 vmm_mem_free(hpa, PAGE_SIZE); 324 325 len += PAGE_SIZE; 326 } 327 328 /* 329 * Invalidate cached translations associated with 'vm->iommu' since 330 * we have now moved some pages from it. 331 */ 332 iommu_invalidate_tlb(vm->iommu); 333 334 bzero(seg, sizeof(struct vm_memory_segment)); 335 } 336 337 void 338 vm_destroy(struct vm *vm) 339 { 340 int i; 341 342 ppt_unassign_all(vm); 343 344 for (i = 0; i < vm->num_mem_segs; i++) 345 vm_free_mem_seg(vm, &vm->mem_segs[i]); 346 347 vm->num_mem_segs = 0; 348 349 for (i = 0; i < VM_MAXCPU; i++) 350 vcpu_cleanup(&vm->vcpu[i]); 351 352 iommu_destroy_domain(vm->iommu); 353 354 VMCLEANUP(vm->cookie); 355 356 free(vm, M_VM); 357 } 358 359 const char * 360 vm_name(struct vm *vm) 361 { 362 return (vm->name); 363 } 364 365 int 366 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 367 { 368 const boolean_t spok = TRUE; /* superpage mappings are ok */ 369 370 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 371 VM_PROT_RW, spok)); 372 } 373 374 int 375 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 376 { 377 const boolean_t spok = TRUE; /* superpage mappings are ok */ 378 379 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 380 VM_PROT_NONE, spok)); 381 } 382 383 /* 384 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 385 */ 386 static boolean_t 387 vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 388 { 389 int i; 390 vm_paddr_t gpabase, gpalimit; 391 392 if (gpa & PAGE_MASK) 393 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 394 395 for (i = 0; i < vm->num_mem_segs; i++) { 396 gpabase = vm->mem_segs[i].gpa; 397 gpalimit = gpabase + vm->mem_segs[i].len; 398 if (gpa >= gpabase && gpa < gpalimit) 399 return (FALSE); 400 } 401 402 return (TRUE); 403 } 404 405 int 406 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 407 { 408 int error, available, allocated; 409 struct vm_memory_segment *seg; 410 vm_paddr_t g, hpa; 411 void *host_domain; 412 413 const boolean_t spok = TRUE; /* superpage mappings are ok */ 414 415 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 416 return (EINVAL); 417 418 available = allocated = 0; 419 g = gpa; 420 while (g < gpa + len) { 421 if (vm_gpa_available(vm, g)) 422 available++; 423 else 424 allocated++; 425 426 g += PAGE_SIZE; 427 } 428 429 /* 430 * If there are some allocated and some available pages in the address 431 * range then it is an error. 432 */ 433 if (allocated && available) 434 return (EINVAL); 435 436 /* 437 * If the entire address range being requested has already been 438 * allocated then there isn't anything more to do. 439 */ 440 if (allocated && available == 0) 441 return (0); 442 443 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 444 return (E2BIG); 445 446 host_domain = iommu_host_domain(); 447 448 seg = &vm->mem_segs[vm->num_mem_segs]; 449 450 error = 0; 451 seg->gpa = gpa; 452 seg->len = 0; 453 while (seg->len < len) { 454 hpa = vmm_mem_alloc(PAGE_SIZE); 455 if (hpa == 0) { 456 error = ENOMEM; 457 break; 458 } 459 460 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 461 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 462 if (error) 463 break; 464 465 /* 466 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 467 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 468 */ 469 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 470 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 471 472 seg->len += PAGE_SIZE; 473 } 474 475 if (error) { 476 vm_free_mem_seg(vm, seg); 477 return (error); 478 } 479 480 /* 481 * Invalidate cached translations associated with 'host_domain' since 482 * we have now moved some pages from it. 483 */ 484 iommu_invalidate_tlb(host_domain); 485 486 vm->num_mem_segs++; 487 488 return (0); 489 } 490 491 vm_paddr_t 492 vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 493 { 494 vm_paddr_t nextpage; 495 496 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 497 if (len > nextpage - gpa) 498 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 499 500 return (VMMMAP_GET(vm->cookie, gpa)); 501 } 502 503 int 504 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 505 struct vm_memory_segment *seg) 506 { 507 int i; 508 509 for (i = 0; i < vm->num_mem_segs; i++) { 510 if (gpabase == vm->mem_segs[i].gpa) { 511 *seg = vm->mem_segs[i]; 512 return (0); 513 } 514 } 515 return (-1); 516 } 517 518 int 519 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 520 { 521 522 if (vcpu < 0 || vcpu >= VM_MAXCPU) 523 return (EINVAL); 524 525 if (reg >= VM_REG_LAST) 526 return (EINVAL); 527 528 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 529 } 530 531 int 532 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 533 { 534 535 if (vcpu < 0 || vcpu >= VM_MAXCPU) 536 return (EINVAL); 537 538 if (reg >= VM_REG_LAST) 539 return (EINVAL); 540 541 return (VMSETREG(vm->cookie, vcpu, reg, val)); 542 } 543 544 static boolean_t 545 is_descriptor_table(int reg) 546 { 547 548 switch (reg) { 549 case VM_REG_GUEST_IDTR: 550 case VM_REG_GUEST_GDTR: 551 return (TRUE); 552 default: 553 return (FALSE); 554 } 555 } 556 557 static boolean_t 558 is_segment_register(int reg) 559 { 560 561 switch (reg) { 562 case VM_REG_GUEST_ES: 563 case VM_REG_GUEST_CS: 564 case VM_REG_GUEST_SS: 565 case VM_REG_GUEST_DS: 566 case VM_REG_GUEST_FS: 567 case VM_REG_GUEST_GS: 568 case VM_REG_GUEST_TR: 569 case VM_REG_GUEST_LDTR: 570 return (TRUE); 571 default: 572 return (FALSE); 573 } 574 } 575 576 int 577 vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 578 struct seg_desc *desc) 579 { 580 581 if (vcpu < 0 || vcpu >= VM_MAXCPU) 582 return (EINVAL); 583 584 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 585 return (EINVAL); 586 587 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 588 } 589 590 int 591 vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 592 struct seg_desc *desc) 593 { 594 if (vcpu < 0 || vcpu >= VM_MAXCPU) 595 return (EINVAL); 596 597 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 598 return (EINVAL); 599 600 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 601 } 602 603 static void 604 restore_guest_fpustate(struct vcpu *vcpu) 605 { 606 607 /* flush host state to the pcb */ 608 fpuexit(curthread); 609 610 /* restore guest FPU state */ 611 fpu_stop_emulating(); 612 fpurestore(vcpu->guestfpu); 613 614 /* 615 * The FPU is now "dirty" with the guest's state so turn on emulation 616 * to trap any access to the FPU by the host. 617 */ 618 fpu_start_emulating(); 619 } 620 621 static void 622 save_guest_fpustate(struct vcpu *vcpu) 623 { 624 625 if ((rcr0() & CR0_TS) == 0) 626 panic("fpu emulation not enabled in host!"); 627 628 /* save guest FPU state */ 629 fpu_stop_emulating(); 630 fpusave(vcpu->guestfpu); 631 fpu_start_emulating(); 632 } 633 634 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 635 636 int 637 vm_run(struct vm *vm, struct vm_run *vmrun) 638 { 639 int error, vcpuid, sleepticks, t; 640 struct vcpu *vcpu; 641 struct pcb *pcb; 642 uint64_t tscval, rip; 643 struct vm_exit *vme; 644 645 vcpuid = vmrun->cpuid; 646 647 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 648 return (EINVAL); 649 650 vcpu = &vm->vcpu[vcpuid]; 651 vme = &vmrun->vm_exit; 652 rip = vmrun->rip; 653 restart: 654 critical_enter(); 655 656 tscval = rdtsc(); 657 658 pcb = PCPU_GET(curpcb); 659 set_pcb_flags(pcb, PCB_FULL_IRET); 660 661 restore_guest_msrs(vm, vcpuid); 662 restore_guest_fpustate(vcpu); 663 664 vcpu->hostcpu = curcpu; 665 error = VMRUN(vm->cookie, vcpuid, rip); 666 vcpu->hostcpu = NOCPU; 667 668 save_guest_fpustate(vcpu); 669 restore_host_msrs(vm, vcpuid); 670 671 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 672 673 /* copy the exit information */ 674 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 675 676 critical_exit(); 677 678 /* 679 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 680 * is ready to run. 681 */ 682 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 683 vcpu_lock(vcpu); 684 685 /* 686 * Figure out the number of host ticks until the next apic 687 * timer interrupt in the guest. 688 */ 689 sleepticks = lapic_timer_tick(vm, vcpuid); 690 691 /* 692 * If the guest local apic timer is disabled then sleep for 693 * a long time but not forever. 694 */ 695 if (sleepticks < 0) 696 sleepticks = hz; 697 698 /* 699 * Do a final check for pending NMI or interrupts before 700 * really putting this thread to sleep. 701 * 702 * These interrupts could have happened any time after we 703 * returned from VMRUN() and before we grabbed the vcpu lock. 704 */ 705 if (!vm_nmi_pending(vm, vcpuid) && 706 lapic_pending_intr(vm, vcpuid) < 0) { 707 if (sleepticks <= 0) 708 panic("invalid sleepticks %d", sleepticks); 709 t = ticks; 710 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 711 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 712 } 713 714 vcpu_unlock(vcpu); 715 716 rip = vme->rip + vme->inst_length; 717 goto restart; 718 } 719 720 return (error); 721 } 722 723 int 724 vm_inject_event(struct vm *vm, int vcpuid, int type, 725 int vector, uint32_t code, int code_valid) 726 { 727 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 728 return (EINVAL); 729 730 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 731 return (EINVAL); 732 733 if (vector < 0 || vector > 255) 734 return (EINVAL); 735 736 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 737 } 738 739 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 740 741 int 742 vm_inject_nmi(struct vm *vm, int vcpuid) 743 { 744 struct vcpu *vcpu; 745 746 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 747 return (EINVAL); 748 749 vcpu = &vm->vcpu[vcpuid]; 750 751 vcpu->nmi_pending = 1; 752 vm_interrupt_hostcpu(vm, vcpuid); 753 return (0); 754 } 755 756 int 757 vm_nmi_pending(struct vm *vm, int vcpuid) 758 { 759 struct vcpu *vcpu; 760 761 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 762 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 763 764 vcpu = &vm->vcpu[vcpuid]; 765 766 return (vcpu->nmi_pending); 767 } 768 769 void 770 vm_nmi_clear(struct vm *vm, int vcpuid) 771 { 772 struct vcpu *vcpu; 773 774 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 775 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 776 777 vcpu = &vm->vcpu[vcpuid]; 778 779 if (vcpu->nmi_pending == 0) 780 panic("vm_nmi_clear: inconsistent nmi_pending state"); 781 782 vcpu->nmi_pending = 0; 783 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 784 } 785 786 int 787 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 788 { 789 if (vcpu < 0 || vcpu >= VM_MAXCPU) 790 return (EINVAL); 791 792 if (type < 0 || type >= VM_CAP_MAX) 793 return (EINVAL); 794 795 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 796 } 797 798 int 799 vm_set_capability(struct vm *vm, int vcpu, int type, int val) 800 { 801 if (vcpu < 0 || vcpu >= VM_MAXCPU) 802 return (EINVAL); 803 804 if (type < 0 || type >= VM_CAP_MAX) 805 return (EINVAL); 806 807 return (VMSETCAP(vm->cookie, vcpu, type, val)); 808 } 809 810 uint64_t * 811 vm_guest_msrs(struct vm *vm, int cpu) 812 { 813 return (vm->vcpu[cpu].guest_msrs); 814 } 815 816 struct vlapic * 817 vm_lapic(struct vm *vm, int cpu) 818 { 819 return (vm->vcpu[cpu].vlapic); 820 } 821 822 boolean_t 823 vmm_is_pptdev(int bus, int slot, int func) 824 { 825 int found, i, n; 826 int b, s, f; 827 char *val, *cp, *cp2; 828 829 /* 830 * XXX 831 * The length of an environment variable is limited to 128 bytes which 832 * puts an upper limit on the number of passthru devices that may be 833 * specified using a single environment variable. 834 * 835 * Work around this by scanning multiple environment variable 836 * names instead of a single one - yuck! 837 */ 838 const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 839 840 /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 841 found = 0; 842 for (i = 0; names[i] != NULL && !found; i++) { 843 cp = val = getenv(names[i]); 844 while (cp != NULL && *cp != '\0') { 845 if ((cp2 = strchr(cp, ' ')) != NULL) 846 *cp2 = '\0'; 847 848 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 849 if (n == 3 && bus == b && slot == s && func == f) { 850 found = 1; 851 break; 852 } 853 854 if (cp2 != NULL) 855 *cp2++ = ' '; 856 857 cp = cp2; 858 } 859 freeenv(val); 860 } 861 return (found); 862 } 863 864 void * 865 vm_iommu_domain(struct vm *vm) 866 { 867 868 return (vm->iommu); 869 } 870 871 int 872 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 873 { 874 int error; 875 struct vcpu *vcpu; 876 877 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 878 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 879 880 vcpu = &vm->vcpu[vcpuid]; 881 882 vcpu_lock(vcpu); 883 884 /* 885 * The following state transitions are allowed: 886 * IDLE -> RUNNING -> IDLE 887 * IDLE -> CANNOT_RUN -> IDLE 888 */ 889 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 890 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 891 error = 0; 892 vcpu->state = state; 893 } else { 894 error = EBUSY; 895 } 896 897 vcpu_unlock(vcpu); 898 899 return (error); 900 } 901 902 enum vcpu_state 903 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 904 { 905 struct vcpu *vcpu; 906 enum vcpu_state state; 907 908 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 909 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 910 911 vcpu = &vm->vcpu[vcpuid]; 912 913 vcpu_lock(vcpu); 914 state = vcpu->state; 915 if (hostcpu != NULL) 916 *hostcpu = vcpu->hostcpu; 917 vcpu_unlock(vcpu); 918 919 return (state); 920 } 921 922 void 923 vm_activate_cpu(struct vm *vm, int vcpuid) 924 { 925 926 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 927 CPU_SET(vcpuid, &vm->active_cpus); 928 } 929 930 cpuset_t 931 vm_active_cpus(struct vm *vm) 932 { 933 934 return (vm->active_cpus); 935 } 936 937 void * 938 vcpu_stats(struct vm *vm, int vcpuid) 939 { 940 941 return (vm->vcpu[vcpuid].stats); 942 } 943 944 int 945 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 946 { 947 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 948 return (EINVAL); 949 950 *state = vm->vcpu[vcpuid].x2apic_state; 951 952 return (0); 953 } 954 955 int 956 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 957 { 958 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 959 return (EINVAL); 960 961 if (state >= X2APIC_STATE_LAST) 962 return (EINVAL); 963 964 vm->vcpu[vcpuid].x2apic_state = state; 965 966 vlapic_set_x2apic_state(vm, vcpuid, state); 967 968 return (0); 969 } 970 971 void 972 vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 973 { 974 int hostcpu; 975 struct vcpu *vcpu; 976 977 vcpu = &vm->vcpu[vcpuid]; 978 979 vcpu_lock(vcpu); 980 hostcpu = vcpu->hostcpu; 981 if (hostcpu == NOCPU) { 982 /* 983 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 984 * the host thread must be sleeping waiting for an event to 985 * kick the vcpu out of 'hlt'. 986 * 987 * XXX this is racy because the condition exists right before 988 * and after calling VMRUN() in vm_run(). The wakeup() is 989 * benign in this case. 990 */ 991 if (vcpu->state == VCPU_RUNNING) 992 wakeup_one(vcpu); 993 } else { 994 if (vcpu->state != VCPU_RUNNING) 995 panic("invalid vcpu state %d", vcpu->state); 996 if (hostcpu != curcpu) 997 ipi_cpu(hostcpu, vmm_ipinum); 998 } 999 vcpu_unlock(vcpu); 1000 } 1001