1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/sysctl.h> 37 #include <sys/malloc.h> 38 #include <sys/pcpu.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/systm.h> 45 46 #include <vm/vm.h> 47 48 #include <machine/vm.h> 49 #include <machine/pcb.h> 50 #include <machine/smp.h> 51 #include <x86/apicreg.h> 52 53 #include <machine/vmm.h> 54 #include "vmm_host.h" 55 #include "vmm_mem.h" 56 #include "vmm_util.h" 57 #include <machine/vmm_dev.h> 58 #include "vlapic.h" 59 #include "vmm_msr.h" 60 #include "vmm_ipi.h" 61 #include "vmm_stat.h" 62 #include "vmm_lapic.h" 63 64 #include "io/ppt.h" 65 #include "io/iommu.h" 66 67 struct vlapic; 68 69 struct vcpu { 70 int flags; 71 enum vcpu_state state; 72 struct mtx mtx; 73 int hostcpu; /* host cpuid this vcpu last ran on */ 74 uint64_t guest_msrs[VMM_MSR_NUM]; 75 struct vlapic *vlapic; 76 int vcpuid; 77 struct savefpu *guestfpu; /* guest fpu state */ 78 void *stats; 79 struct vm_exit exitinfo; 80 enum x2apic_state x2apic_state; 81 int nmi_pending; 82 }; 83 84 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 85 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 86 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 87 88 #define VM_MAX_MEMORY_SEGMENTS 2 89 90 struct vm { 91 void *cookie; /* processor-specific data */ 92 void *iommu; /* iommu-specific data */ 93 struct vcpu vcpu[VM_MAXCPU]; 94 int num_mem_segs; 95 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 96 char name[VM_MAX_NAMELEN]; 97 98 /* 99 * Set of active vcpus. 100 * An active vcpu is one that has been started implicitly (BSP) or 101 * explicitly (AP) by sending it a startup ipi. 102 */ 103 cpuset_t active_cpus; 104 }; 105 106 static struct vmm_ops *ops; 107 #define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 108 #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 109 110 #define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 111 #define VMRUN(vmi, vcpu, rip) \ 112 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 113 #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 114 #define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 115 (ops != NULL ? \ 116 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 117 ENXIO) 118 #define VMMMAP_GET(vmi, gpa) \ 119 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 120 #define VMGETREG(vmi, vcpu, num, retval) \ 121 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 122 #define VMSETREG(vmi, vcpu, num, val) \ 123 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 124 #define VMGETDESC(vmi, vcpu, num, desc) \ 125 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 126 #define VMSETDESC(vmi, vcpu, num, desc) \ 127 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 128 #define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 129 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 130 #define VMGETCAP(vmi, vcpu, num, retval) \ 131 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 132 #define VMSETCAP(vmi, vcpu, num, val) \ 133 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 134 135 #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 136 #define fpu_stop_emulating() clts() 137 138 static MALLOC_DEFINE(M_VM, "vm", "vm"); 139 CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 140 141 /* statistics */ 142 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 143 144 static void 145 vcpu_cleanup(struct vcpu *vcpu) 146 { 147 vlapic_cleanup(vcpu->vlapic); 148 vmm_stat_free(vcpu->stats); 149 fpu_save_area_free(vcpu->guestfpu); 150 } 151 152 static void 153 vcpu_init(struct vm *vm, uint32_t vcpu_id) 154 { 155 struct vcpu *vcpu; 156 157 vcpu = &vm->vcpu[vcpu_id]; 158 159 vcpu_lock_init(vcpu); 160 vcpu->hostcpu = NOCPU; 161 vcpu->vcpuid = vcpu_id; 162 vcpu->vlapic = vlapic_init(vm, vcpu_id); 163 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 164 vcpu->guestfpu = fpu_save_area_alloc(); 165 fpu_save_area_reset(vcpu->guestfpu); 166 vcpu->stats = vmm_stat_alloc(); 167 } 168 169 struct vm_exit * 170 vm_exitinfo(struct vm *vm, int cpuid) 171 { 172 struct vcpu *vcpu; 173 174 if (cpuid < 0 || cpuid >= VM_MAXCPU) 175 panic("vm_exitinfo: invalid cpuid %d", cpuid); 176 177 vcpu = &vm->vcpu[cpuid]; 178 179 return (&vcpu->exitinfo); 180 } 181 182 static int 183 vmm_init(void) 184 { 185 int error; 186 187 vmm_host_state_init(); 188 vmm_ipi_init(); 189 190 error = vmm_mem_init(); 191 if (error) 192 return (error); 193 194 if (vmm_is_intel()) 195 ops = &vmm_ops_intel; 196 else if (vmm_is_amd()) 197 ops = &vmm_ops_amd; 198 else 199 return (ENXIO); 200 201 vmm_msr_init(); 202 203 return (VMM_INIT()); 204 } 205 206 static int 207 vmm_handler(module_t mod, int what, void *arg) 208 { 209 int error; 210 211 switch (what) { 212 case MOD_LOAD: 213 vmmdev_init(); 214 iommu_init(); 215 error = vmm_init(); 216 break; 217 case MOD_UNLOAD: 218 error = vmmdev_cleanup(); 219 if (error == 0) { 220 iommu_cleanup(); 221 vmm_ipi_cleanup(); 222 error = VMM_CLEANUP(); 223 } 224 break; 225 default: 226 error = 0; 227 break; 228 } 229 return (error); 230 } 231 232 static moduledata_t vmm_kmod = { 233 "vmm", 234 vmm_handler, 235 NULL 236 }; 237 238 /* 239 * vmm initialization has the following dependencies: 240 * 241 * - iommu initialization must happen after the pci passthru driver has had 242 * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 243 * 244 * - VT-x initialization requires smp_rendezvous() and therefore must happen 245 * after SMP is fully functional (after SI_SUB_SMP). 246 */ 247 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 248 MODULE_VERSION(vmm, 1); 249 250 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 251 252 struct vm * 253 vm_create(const char *name) 254 { 255 int i; 256 struct vm *vm; 257 vm_paddr_t maxaddr; 258 259 const int BSP = 0; 260 261 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 262 return (NULL); 263 264 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 265 strcpy(vm->name, name); 266 vm->cookie = VMINIT(vm); 267 268 for (i = 0; i < VM_MAXCPU; i++) { 269 vcpu_init(vm, i); 270 guest_msrs_init(vm, i); 271 } 272 273 maxaddr = vmm_mem_maxaddr(); 274 vm->iommu = iommu_create_domain(maxaddr); 275 vm_activate_cpu(vm, BSP); 276 277 return (vm); 278 } 279 280 static void 281 vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 282 { 283 size_t len; 284 vm_paddr_t hpa; 285 void *host_domain; 286 287 host_domain = iommu_host_domain(); 288 289 len = 0; 290 while (len < seg->len) { 291 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 292 if (hpa == (vm_paddr_t)-1) { 293 panic("vm_free_mem_segs: cannot free hpa " 294 "associated with gpa 0x%016lx", seg->gpa + len); 295 } 296 297 /* 298 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 299 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 300 */ 301 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 302 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 303 304 vmm_mem_free(hpa, PAGE_SIZE); 305 306 len += PAGE_SIZE; 307 } 308 309 /* 310 * Invalidate cached translations associated with 'vm->iommu' since 311 * we have now moved some pages from it. 312 */ 313 iommu_invalidate_tlb(vm->iommu); 314 315 bzero(seg, sizeof(struct vm_memory_segment)); 316 } 317 318 void 319 vm_destroy(struct vm *vm) 320 { 321 int i; 322 323 ppt_unassign_all(vm); 324 325 for (i = 0; i < vm->num_mem_segs; i++) 326 vm_free_mem_seg(vm, &vm->mem_segs[i]); 327 328 vm->num_mem_segs = 0; 329 330 for (i = 0; i < VM_MAXCPU; i++) 331 vcpu_cleanup(&vm->vcpu[i]); 332 333 iommu_destroy_domain(vm->iommu); 334 335 VMCLEANUP(vm->cookie); 336 337 free(vm, M_VM); 338 } 339 340 const char * 341 vm_name(struct vm *vm) 342 { 343 return (vm->name); 344 } 345 346 int 347 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 348 { 349 const boolean_t spok = TRUE; /* superpage mappings are ok */ 350 351 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 352 VM_PROT_RW, spok)); 353 } 354 355 int 356 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 357 { 358 const boolean_t spok = TRUE; /* superpage mappings are ok */ 359 360 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 361 VM_PROT_NONE, spok)); 362 } 363 364 /* 365 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 366 */ 367 static boolean_t 368 vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 369 { 370 int i; 371 vm_paddr_t gpabase, gpalimit; 372 373 if (gpa & PAGE_MASK) 374 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 375 376 for (i = 0; i < vm->num_mem_segs; i++) { 377 gpabase = vm->mem_segs[i].gpa; 378 gpalimit = gpabase + vm->mem_segs[i].len; 379 if (gpa >= gpabase && gpa < gpalimit) 380 return (FALSE); 381 } 382 383 return (TRUE); 384 } 385 386 int 387 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 388 { 389 int error, available, allocated; 390 struct vm_memory_segment *seg; 391 vm_paddr_t g, hpa; 392 void *host_domain; 393 394 const boolean_t spok = TRUE; /* superpage mappings are ok */ 395 396 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 397 return (EINVAL); 398 399 available = allocated = 0; 400 g = gpa; 401 while (g < gpa + len) { 402 if (vm_gpa_available(vm, g)) 403 available++; 404 else 405 allocated++; 406 407 g += PAGE_SIZE; 408 } 409 410 /* 411 * If there are some allocated and some available pages in the address 412 * range then it is an error. 413 */ 414 if (allocated && available) 415 return (EINVAL); 416 417 /* 418 * If the entire address range being requested has already been 419 * allocated then there isn't anything more to do. 420 */ 421 if (allocated && available == 0) 422 return (0); 423 424 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 425 return (E2BIG); 426 427 host_domain = iommu_host_domain(); 428 429 seg = &vm->mem_segs[vm->num_mem_segs]; 430 431 error = 0; 432 seg->gpa = gpa; 433 seg->len = 0; 434 while (seg->len < len) { 435 hpa = vmm_mem_alloc(PAGE_SIZE); 436 if (hpa == 0) { 437 error = ENOMEM; 438 break; 439 } 440 441 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 442 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 443 if (error) 444 break; 445 446 /* 447 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 448 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 449 */ 450 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 451 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 452 453 seg->len += PAGE_SIZE; 454 } 455 456 if (error) { 457 vm_free_mem_seg(vm, seg); 458 return (error); 459 } 460 461 /* 462 * Invalidate cached translations associated with 'host_domain' since 463 * we have now moved some pages from it. 464 */ 465 iommu_invalidate_tlb(host_domain); 466 467 vm->num_mem_segs++; 468 469 return (0); 470 } 471 472 vm_paddr_t 473 vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 474 { 475 vm_paddr_t nextpage; 476 477 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 478 if (len > nextpage - gpa) 479 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 480 481 return (VMMMAP_GET(vm->cookie, gpa)); 482 } 483 484 int 485 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 486 struct vm_memory_segment *seg) 487 { 488 int i; 489 490 for (i = 0; i < vm->num_mem_segs; i++) { 491 if (gpabase == vm->mem_segs[i].gpa) { 492 *seg = vm->mem_segs[i]; 493 return (0); 494 } 495 } 496 return (-1); 497 } 498 499 int 500 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 501 { 502 503 if (vcpu < 0 || vcpu >= VM_MAXCPU) 504 return (EINVAL); 505 506 if (reg >= VM_REG_LAST) 507 return (EINVAL); 508 509 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 510 } 511 512 int 513 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 514 { 515 516 if (vcpu < 0 || vcpu >= VM_MAXCPU) 517 return (EINVAL); 518 519 if (reg >= VM_REG_LAST) 520 return (EINVAL); 521 522 return (VMSETREG(vm->cookie, vcpu, reg, val)); 523 } 524 525 static boolean_t 526 is_descriptor_table(int reg) 527 { 528 529 switch (reg) { 530 case VM_REG_GUEST_IDTR: 531 case VM_REG_GUEST_GDTR: 532 return (TRUE); 533 default: 534 return (FALSE); 535 } 536 } 537 538 static boolean_t 539 is_segment_register(int reg) 540 { 541 542 switch (reg) { 543 case VM_REG_GUEST_ES: 544 case VM_REG_GUEST_CS: 545 case VM_REG_GUEST_SS: 546 case VM_REG_GUEST_DS: 547 case VM_REG_GUEST_FS: 548 case VM_REG_GUEST_GS: 549 case VM_REG_GUEST_TR: 550 case VM_REG_GUEST_LDTR: 551 return (TRUE); 552 default: 553 return (FALSE); 554 } 555 } 556 557 int 558 vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 559 struct seg_desc *desc) 560 { 561 562 if (vcpu < 0 || vcpu >= VM_MAXCPU) 563 return (EINVAL); 564 565 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 566 return (EINVAL); 567 568 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 569 } 570 571 int 572 vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 573 struct seg_desc *desc) 574 { 575 if (vcpu < 0 || vcpu >= VM_MAXCPU) 576 return (EINVAL); 577 578 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 579 return (EINVAL); 580 581 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 582 } 583 584 static void 585 restore_guest_fpustate(struct vcpu *vcpu) 586 { 587 588 /* flush host state to the pcb */ 589 fpuexit(curthread); 590 591 /* restore guest FPU state */ 592 fpu_stop_emulating(); 593 fpurestore(vcpu->guestfpu); 594 595 /* 596 * The FPU is now "dirty" with the guest's state so turn on emulation 597 * to trap any access to the FPU by the host. 598 */ 599 fpu_start_emulating(); 600 } 601 602 static void 603 save_guest_fpustate(struct vcpu *vcpu) 604 { 605 606 if ((rcr0() & CR0_TS) == 0) 607 panic("fpu emulation not enabled in host!"); 608 609 /* save guest FPU state */ 610 fpu_stop_emulating(); 611 fpusave(vcpu->guestfpu); 612 fpu_start_emulating(); 613 } 614 615 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 616 617 int 618 vm_run(struct vm *vm, struct vm_run *vmrun) 619 { 620 int error, vcpuid, sleepticks, t; 621 struct vcpu *vcpu; 622 struct pcb *pcb; 623 uint64_t tscval, rip; 624 struct vm_exit *vme; 625 626 vcpuid = vmrun->cpuid; 627 628 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 629 return (EINVAL); 630 631 vcpu = &vm->vcpu[vcpuid]; 632 vme = &vmrun->vm_exit; 633 rip = vmrun->rip; 634 restart: 635 critical_enter(); 636 637 tscval = rdtsc(); 638 639 pcb = PCPU_GET(curpcb); 640 set_pcb_flags(pcb, PCB_FULL_IRET); 641 642 restore_guest_msrs(vm, vcpuid); 643 restore_guest_fpustate(vcpu); 644 645 vcpu->hostcpu = curcpu; 646 error = VMRUN(vm->cookie, vcpuid, rip); 647 vcpu->hostcpu = NOCPU; 648 649 save_guest_fpustate(vcpu); 650 restore_host_msrs(vm, vcpuid); 651 652 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 653 654 /* copy the exit information */ 655 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 656 657 critical_exit(); 658 659 /* 660 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 661 * is ready to run. 662 */ 663 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 664 vcpu_lock(vcpu); 665 666 /* 667 * Figure out the number of host ticks until the next apic 668 * timer interrupt in the guest. 669 */ 670 sleepticks = lapic_timer_tick(vm, vcpuid); 671 672 /* 673 * If the guest local apic timer is disabled then sleep for 674 * a long time but not forever. 675 */ 676 if (sleepticks < 0) 677 sleepticks = hz; 678 679 /* 680 * Do a final check for pending NMI or interrupts before 681 * really putting this thread to sleep. 682 * 683 * These interrupts could have happened any time after we 684 * returned from VMRUN() and before we grabbed the vcpu lock. 685 */ 686 if (!vm_nmi_pending(vm, vcpuid) && 687 lapic_pending_intr(vm, vcpuid) < 0) { 688 if (sleepticks <= 0) 689 panic("invalid sleepticks %d", sleepticks); 690 t = ticks; 691 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 692 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 693 } 694 695 vcpu_unlock(vcpu); 696 697 rip = vme->rip + vme->inst_length; 698 goto restart; 699 } 700 701 return (error); 702 } 703 704 int 705 vm_inject_event(struct vm *vm, int vcpuid, int type, 706 int vector, uint32_t code, int code_valid) 707 { 708 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 709 return (EINVAL); 710 711 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 712 return (EINVAL); 713 714 if (vector < 0 || vector > 255) 715 return (EINVAL); 716 717 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 718 } 719 720 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 721 722 int 723 vm_inject_nmi(struct vm *vm, int vcpuid) 724 { 725 struct vcpu *vcpu; 726 727 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 728 return (EINVAL); 729 730 vcpu = &vm->vcpu[vcpuid]; 731 732 vcpu->nmi_pending = 1; 733 vm_interrupt_hostcpu(vm, vcpuid); 734 return (0); 735 } 736 737 int 738 vm_nmi_pending(struct vm *vm, int vcpuid) 739 { 740 struct vcpu *vcpu; 741 742 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 743 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 744 745 vcpu = &vm->vcpu[vcpuid]; 746 747 return (vcpu->nmi_pending); 748 } 749 750 void 751 vm_nmi_clear(struct vm *vm, int vcpuid) 752 { 753 struct vcpu *vcpu; 754 755 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 756 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 757 758 vcpu = &vm->vcpu[vcpuid]; 759 760 if (vcpu->nmi_pending == 0) 761 panic("vm_nmi_clear: inconsistent nmi_pending state"); 762 763 vcpu->nmi_pending = 0; 764 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 765 } 766 767 int 768 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 769 { 770 if (vcpu < 0 || vcpu >= VM_MAXCPU) 771 return (EINVAL); 772 773 if (type < 0 || type >= VM_CAP_MAX) 774 return (EINVAL); 775 776 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 777 } 778 779 int 780 vm_set_capability(struct vm *vm, int vcpu, int type, int val) 781 { 782 if (vcpu < 0 || vcpu >= VM_MAXCPU) 783 return (EINVAL); 784 785 if (type < 0 || type >= VM_CAP_MAX) 786 return (EINVAL); 787 788 return (VMSETCAP(vm->cookie, vcpu, type, val)); 789 } 790 791 uint64_t * 792 vm_guest_msrs(struct vm *vm, int cpu) 793 { 794 return (vm->vcpu[cpu].guest_msrs); 795 } 796 797 struct vlapic * 798 vm_lapic(struct vm *vm, int cpu) 799 { 800 return (vm->vcpu[cpu].vlapic); 801 } 802 803 boolean_t 804 vmm_is_pptdev(int bus, int slot, int func) 805 { 806 int found, i, n; 807 int b, s, f; 808 char *val, *cp, *cp2; 809 810 /* 811 * XXX 812 * The length of an environment variable is limited to 128 bytes which 813 * puts an upper limit on the number of passthru devices that may be 814 * specified using a single environment variable. 815 * 816 * Work around this by scanning multiple environment variable 817 * names instead of a single one - yuck! 818 */ 819 const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 820 821 /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 822 found = 0; 823 for (i = 0; names[i] != NULL && !found; i++) { 824 cp = val = getenv(names[i]); 825 while (cp != NULL && *cp != '\0') { 826 if ((cp2 = strchr(cp, ' ')) != NULL) 827 *cp2 = '\0'; 828 829 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 830 if (n == 3 && bus == b && slot == s && func == f) { 831 found = 1; 832 break; 833 } 834 835 if (cp2 != NULL) 836 *cp2++ = ' '; 837 838 cp = cp2; 839 } 840 freeenv(val); 841 } 842 return (found); 843 } 844 845 void * 846 vm_iommu_domain(struct vm *vm) 847 { 848 849 return (vm->iommu); 850 } 851 852 int 853 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 854 { 855 int error; 856 struct vcpu *vcpu; 857 858 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 859 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 860 861 vcpu = &vm->vcpu[vcpuid]; 862 863 vcpu_lock(vcpu); 864 865 /* 866 * The following state transitions are allowed: 867 * IDLE -> RUNNING -> IDLE 868 * IDLE -> CANNOT_RUN -> IDLE 869 */ 870 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 871 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 872 error = 0; 873 vcpu->state = state; 874 } else { 875 error = EBUSY; 876 } 877 878 vcpu_unlock(vcpu); 879 880 return (error); 881 } 882 883 enum vcpu_state 884 vcpu_get_state(struct vm *vm, int vcpuid) 885 { 886 struct vcpu *vcpu; 887 enum vcpu_state state; 888 889 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 890 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 891 892 vcpu = &vm->vcpu[vcpuid]; 893 894 vcpu_lock(vcpu); 895 state = vcpu->state; 896 vcpu_unlock(vcpu); 897 898 return (state); 899 } 900 901 void 902 vm_activate_cpu(struct vm *vm, int vcpuid) 903 { 904 905 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 906 CPU_SET(vcpuid, &vm->active_cpus); 907 } 908 909 cpuset_t 910 vm_active_cpus(struct vm *vm) 911 { 912 913 return (vm->active_cpus); 914 } 915 916 void * 917 vcpu_stats(struct vm *vm, int vcpuid) 918 { 919 920 return (vm->vcpu[vcpuid].stats); 921 } 922 923 int 924 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 925 { 926 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 927 return (EINVAL); 928 929 *state = vm->vcpu[vcpuid].x2apic_state; 930 931 return (0); 932 } 933 934 int 935 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 936 { 937 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 938 return (EINVAL); 939 940 if (state >= X2APIC_STATE_LAST) 941 return (EINVAL); 942 943 vm->vcpu[vcpuid].x2apic_state = state; 944 945 vlapic_set_x2apic_state(vm, vcpuid, state); 946 947 return (0); 948 } 949 950 void 951 vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 952 { 953 int hostcpu; 954 struct vcpu *vcpu; 955 956 vcpu = &vm->vcpu[vcpuid]; 957 958 vcpu_lock(vcpu); 959 hostcpu = vcpu->hostcpu; 960 if (hostcpu == NOCPU) { 961 /* 962 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 963 * the host thread must be sleeping waiting for an event to 964 * kick the vcpu out of 'hlt'. 965 * 966 * XXX this is racy because the condition exists right before 967 * and after calling VMRUN() in vm_run(). The wakeup() is 968 * benign in this case. 969 */ 970 if (vcpu->state == VCPU_RUNNING) 971 wakeup_one(vcpu); 972 } else { 973 if (vcpu->state != VCPU_RUNNING) 974 panic("invalid vcpu state %d", vcpu->state); 975 if (hostcpu != curcpu) 976 ipi_cpu(hostcpu, vmm_ipinum); 977 } 978 vcpu_unlock(vcpu); 979 } 980