1 /* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * This module enables machines with Intel VT-x extensions to run virtual 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * 9 * Authors: 10 * Avi Kivity <avi@qumranet.com> 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "iodev.h" 19 20 #include <linux/kvm_host.h> 21 #include <linux/kvm.h> 22 #include <linux/module.h> 23 #include <linux/errno.h> 24 #include <linux/percpu.h> 25 #include <linux/gfp.h> 26 #include <linux/mm.h> 27 #include <linux/miscdevice.h> 28 #include <linux/vmalloc.h> 29 #include <linux/reboot.h> 30 #include <linux/debugfs.h> 31 #include <linux/highmem.h> 32 #include <linux/file.h> 33 #include <linux/sysdev.h> 34 #include <linux/cpu.h> 35 #include <linux/sched.h> 36 #include <linux/cpumask.h> 37 #include <linux/smp.h> 38 #include <linux/anon_inodes.h> 39 #include <linux/profile.h> 40 #include <linux/kvm_para.h> 41 #include <linux/pagemap.h> 42 #include <linux/mman.h> 43 #include <linux/swap.h> 44 #include <linux/bitops.h> 45 #include <linux/spinlock.h> 46 #include <linux/compat.h> 47 48 #include <asm/processor.h> 49 #include <asm/io.h> 50 #include <asm/uaccess.h> 51 #include <asm/pgtable.h> 52 #include <asm-generic/bitops/le.h> 53 54 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 55 #include "coalesced_mmio.h" 56 #endif 57 58 #define CREATE_TRACE_POINTS 59 #include <trace/events/kvm.h> 60 61 MODULE_AUTHOR("Qumranet"); 62 MODULE_LICENSE("GPL"); 63 64 /* 65 * Ordering of locks: 66 * 67 * kvm->slots_lock --> kvm->lock --> kvm->irq_lock 68 */ 69 70 DEFINE_SPINLOCK(kvm_lock); 71 LIST_HEAD(vm_list); 72 73 static cpumask_var_t cpus_hardware_enabled; 74 static int kvm_usage_count = 0; 75 static atomic_t hardware_enable_failed; 76 77 struct kmem_cache *kvm_vcpu_cache; 78 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 79 80 static __read_mostly struct preempt_ops kvm_preempt_ops; 81 82 struct dentry *kvm_debugfs_dir; 83 84 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 85 unsigned long arg); 86 static int hardware_enable_all(void); 87 static void hardware_disable_all(void); 88 89 static bool kvm_rebooting; 90 91 static bool largepages_enabled = true; 92 93 inline int kvm_is_mmio_pfn(pfn_t pfn) 94 { 95 if (pfn_valid(pfn)) { 96 struct page *page = compound_head(pfn_to_page(pfn)); 97 return PageReserved(page); 98 } 99 100 return true; 101 } 102 103 /* 104 * Switches to specified vcpu, until a matching vcpu_put() 105 */ 106 void vcpu_load(struct kvm_vcpu *vcpu) 107 { 108 int cpu; 109 110 mutex_lock(&vcpu->mutex); 111 cpu = get_cpu(); 112 preempt_notifier_register(&vcpu->preempt_notifier); 113 kvm_arch_vcpu_load(vcpu, cpu); 114 put_cpu(); 115 } 116 117 void vcpu_put(struct kvm_vcpu *vcpu) 118 { 119 preempt_disable(); 120 kvm_arch_vcpu_put(vcpu); 121 preempt_notifier_unregister(&vcpu->preempt_notifier); 122 preempt_enable(); 123 mutex_unlock(&vcpu->mutex); 124 } 125 126 static void ack_flush(void *_completed) 127 { 128 } 129 130 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 131 { 132 int i, cpu, me; 133 cpumask_var_t cpus; 134 bool called = true; 135 struct kvm_vcpu *vcpu; 136 137 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 138 139 spin_lock(&kvm->requests_lock); 140 me = smp_processor_id(); 141 kvm_for_each_vcpu(i, vcpu, kvm) { 142 if (test_and_set_bit(req, &vcpu->requests)) 143 continue; 144 cpu = vcpu->cpu; 145 if (cpus != NULL && cpu != -1 && cpu != me) 146 cpumask_set_cpu(cpu, cpus); 147 } 148 if (unlikely(cpus == NULL)) 149 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); 150 else if (!cpumask_empty(cpus)) 151 smp_call_function_many(cpus, ack_flush, NULL, 1); 152 else 153 called = false; 154 spin_unlock(&kvm->requests_lock); 155 free_cpumask_var(cpus); 156 return called; 157 } 158 159 void kvm_flush_remote_tlbs(struct kvm *kvm) 160 { 161 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 162 ++kvm->stat.remote_tlb_flush; 163 } 164 165 void kvm_reload_remote_mmus(struct kvm *kvm) 166 { 167 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 168 } 169 170 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 171 { 172 struct page *page; 173 int r; 174 175 mutex_init(&vcpu->mutex); 176 vcpu->cpu = -1; 177 vcpu->kvm = kvm; 178 vcpu->vcpu_id = id; 179 init_waitqueue_head(&vcpu->wq); 180 181 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 182 if (!page) { 183 r = -ENOMEM; 184 goto fail; 185 } 186 vcpu->run = page_address(page); 187 188 r = kvm_arch_vcpu_init(vcpu); 189 if (r < 0) 190 goto fail_free_run; 191 return 0; 192 193 fail_free_run: 194 free_page((unsigned long)vcpu->run); 195 fail: 196 return r; 197 } 198 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 199 200 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 201 { 202 kvm_arch_vcpu_uninit(vcpu); 203 free_page((unsigned long)vcpu->run); 204 } 205 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 206 207 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 208 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 209 { 210 return container_of(mn, struct kvm, mmu_notifier); 211 } 212 213 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 214 struct mm_struct *mm, 215 unsigned long address) 216 { 217 struct kvm *kvm = mmu_notifier_to_kvm(mn); 218 int need_tlb_flush; 219 220 /* 221 * When ->invalidate_page runs, the linux pte has been zapped 222 * already but the page is still allocated until 223 * ->invalidate_page returns. So if we increase the sequence 224 * here the kvm page fault will notice if the spte can't be 225 * established because the page is going to be freed. If 226 * instead the kvm page fault establishes the spte before 227 * ->invalidate_page runs, kvm_unmap_hva will release it 228 * before returning. 229 * 230 * The sequence increase only need to be seen at spin_unlock 231 * time, and not at spin_lock time. 232 * 233 * Increasing the sequence after the spin_unlock would be 234 * unsafe because the kvm page fault could then establish the 235 * pte after kvm_unmap_hva returned, without noticing the page 236 * is going to be freed. 237 */ 238 spin_lock(&kvm->mmu_lock); 239 kvm->mmu_notifier_seq++; 240 need_tlb_flush = kvm_unmap_hva(kvm, address); 241 spin_unlock(&kvm->mmu_lock); 242 243 /* we've to flush the tlb before the pages can be freed */ 244 if (need_tlb_flush) 245 kvm_flush_remote_tlbs(kvm); 246 247 } 248 249 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 250 struct mm_struct *mm, 251 unsigned long address, 252 pte_t pte) 253 { 254 struct kvm *kvm = mmu_notifier_to_kvm(mn); 255 256 spin_lock(&kvm->mmu_lock); 257 kvm->mmu_notifier_seq++; 258 kvm_set_spte_hva(kvm, address, pte); 259 spin_unlock(&kvm->mmu_lock); 260 } 261 262 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 263 struct mm_struct *mm, 264 unsigned long start, 265 unsigned long end) 266 { 267 struct kvm *kvm = mmu_notifier_to_kvm(mn); 268 int need_tlb_flush = 0; 269 270 spin_lock(&kvm->mmu_lock); 271 /* 272 * The count increase must become visible at unlock time as no 273 * spte can be established without taking the mmu_lock and 274 * count is also read inside the mmu_lock critical section. 275 */ 276 kvm->mmu_notifier_count++; 277 for (; start < end; start += PAGE_SIZE) 278 need_tlb_flush |= kvm_unmap_hva(kvm, start); 279 spin_unlock(&kvm->mmu_lock); 280 281 /* we've to flush the tlb before the pages can be freed */ 282 if (need_tlb_flush) 283 kvm_flush_remote_tlbs(kvm); 284 } 285 286 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 287 struct mm_struct *mm, 288 unsigned long start, 289 unsigned long end) 290 { 291 struct kvm *kvm = mmu_notifier_to_kvm(mn); 292 293 spin_lock(&kvm->mmu_lock); 294 /* 295 * This sequence increase will notify the kvm page fault that 296 * the page that is going to be mapped in the spte could have 297 * been freed. 298 */ 299 kvm->mmu_notifier_seq++; 300 /* 301 * The above sequence increase must be visible before the 302 * below count decrease but both values are read by the kvm 303 * page fault under mmu_lock spinlock so we don't need to add 304 * a smb_wmb() here in between the two. 305 */ 306 kvm->mmu_notifier_count--; 307 spin_unlock(&kvm->mmu_lock); 308 309 BUG_ON(kvm->mmu_notifier_count < 0); 310 } 311 312 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 313 struct mm_struct *mm, 314 unsigned long address) 315 { 316 struct kvm *kvm = mmu_notifier_to_kvm(mn); 317 int young; 318 319 spin_lock(&kvm->mmu_lock); 320 young = kvm_age_hva(kvm, address); 321 spin_unlock(&kvm->mmu_lock); 322 323 if (young) 324 kvm_flush_remote_tlbs(kvm); 325 326 return young; 327 } 328 329 static void kvm_mmu_notifier_release(struct mmu_notifier *mn, 330 struct mm_struct *mm) 331 { 332 struct kvm *kvm = mmu_notifier_to_kvm(mn); 333 kvm_arch_flush_shadow(kvm); 334 } 335 336 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 337 .invalidate_page = kvm_mmu_notifier_invalidate_page, 338 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 339 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 340 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 341 .change_pte = kvm_mmu_notifier_change_pte, 342 .release = kvm_mmu_notifier_release, 343 }; 344 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 345 346 static struct kvm *kvm_create_vm(void) 347 { 348 int r = 0; 349 struct kvm *kvm = kvm_arch_create_vm(); 350 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 351 struct page *page; 352 #endif 353 354 if (IS_ERR(kvm)) 355 goto out; 356 357 r = hardware_enable_all(); 358 if (r) 359 goto out_err_nodisable; 360 361 #ifdef CONFIG_HAVE_KVM_IRQCHIP 362 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 363 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 364 #endif 365 366 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 367 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 368 if (!page) { 369 r = -ENOMEM; 370 goto out_err; 371 } 372 kvm->coalesced_mmio_ring = 373 (struct kvm_coalesced_mmio_ring *)page_address(page); 374 #endif 375 376 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 377 { 378 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 379 r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); 380 if (r) { 381 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 382 put_page(page); 383 #endif 384 goto out_err; 385 } 386 } 387 #endif 388 389 kvm->mm = current->mm; 390 atomic_inc(&kvm->mm->mm_count); 391 spin_lock_init(&kvm->mmu_lock); 392 spin_lock_init(&kvm->requests_lock); 393 kvm_io_bus_init(&kvm->pio_bus); 394 kvm_eventfd_init(kvm); 395 mutex_init(&kvm->lock); 396 mutex_init(&kvm->irq_lock); 397 kvm_io_bus_init(&kvm->mmio_bus); 398 init_rwsem(&kvm->slots_lock); 399 atomic_set(&kvm->users_count, 1); 400 spin_lock(&kvm_lock); 401 list_add(&kvm->vm_list, &vm_list); 402 spin_unlock(&kvm_lock); 403 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 404 kvm_coalesced_mmio_init(kvm); 405 #endif 406 out: 407 return kvm; 408 409 out_err: 410 hardware_disable_all(); 411 out_err_nodisable: 412 kfree(kvm); 413 return ERR_PTR(r); 414 } 415 416 /* 417 * Free any memory in @free but not in @dont. 418 */ 419 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 420 struct kvm_memory_slot *dont) 421 { 422 int i; 423 424 if (!dont || free->rmap != dont->rmap) 425 vfree(free->rmap); 426 427 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 428 vfree(free->dirty_bitmap); 429 430 431 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 432 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { 433 vfree(free->lpage_info[i]); 434 free->lpage_info[i] = NULL; 435 } 436 } 437 438 free->npages = 0; 439 free->dirty_bitmap = NULL; 440 free->rmap = NULL; 441 } 442 443 void kvm_free_physmem(struct kvm *kvm) 444 { 445 int i; 446 447 for (i = 0; i < kvm->nmemslots; ++i) 448 kvm_free_physmem_slot(&kvm->memslots[i], NULL); 449 } 450 451 static void kvm_destroy_vm(struct kvm *kvm) 452 { 453 struct mm_struct *mm = kvm->mm; 454 455 kvm_arch_sync_events(kvm); 456 spin_lock(&kvm_lock); 457 list_del(&kvm->vm_list); 458 spin_unlock(&kvm_lock); 459 kvm_free_irq_routing(kvm); 460 kvm_io_bus_destroy(&kvm->pio_bus); 461 kvm_io_bus_destroy(&kvm->mmio_bus); 462 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 463 if (kvm->coalesced_mmio_ring != NULL) 464 free_page((unsigned long)kvm->coalesced_mmio_ring); 465 #endif 466 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 467 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 468 #else 469 kvm_arch_flush_shadow(kvm); 470 #endif 471 kvm_arch_destroy_vm(kvm); 472 hardware_disable_all(); 473 mmdrop(mm); 474 } 475 476 void kvm_get_kvm(struct kvm *kvm) 477 { 478 atomic_inc(&kvm->users_count); 479 } 480 EXPORT_SYMBOL_GPL(kvm_get_kvm); 481 482 void kvm_put_kvm(struct kvm *kvm) 483 { 484 if (atomic_dec_and_test(&kvm->users_count)) 485 kvm_destroy_vm(kvm); 486 } 487 EXPORT_SYMBOL_GPL(kvm_put_kvm); 488 489 490 static int kvm_vm_release(struct inode *inode, struct file *filp) 491 { 492 struct kvm *kvm = filp->private_data; 493 494 kvm_irqfd_release(kvm); 495 496 kvm_put_kvm(kvm); 497 return 0; 498 } 499 500 /* 501 * Allocate some memory and give it an address in the guest physical address 502 * space. 503 * 504 * Discontiguous memory is allowed, mostly for framebuffers. 505 * 506 * Must be called holding mmap_sem for write. 507 */ 508 int __kvm_set_memory_region(struct kvm *kvm, 509 struct kvm_userspace_memory_region *mem, 510 int user_alloc) 511 { 512 int r; 513 gfn_t base_gfn; 514 unsigned long npages; 515 unsigned long i; 516 struct kvm_memory_slot *memslot; 517 struct kvm_memory_slot old, new; 518 519 r = -EINVAL; 520 /* General sanity checks */ 521 if (mem->memory_size & (PAGE_SIZE - 1)) 522 goto out; 523 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 524 goto out; 525 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 526 goto out; 527 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 528 goto out; 529 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 530 goto out; 531 532 memslot = &kvm->memslots[mem->slot]; 533 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 534 npages = mem->memory_size >> PAGE_SHIFT; 535 536 if (!npages) 537 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 538 539 new = old = *memslot; 540 541 new.base_gfn = base_gfn; 542 new.npages = npages; 543 new.flags = mem->flags; 544 545 /* Disallow changing a memory slot's size. */ 546 r = -EINVAL; 547 if (npages && old.npages && npages != old.npages) 548 goto out_free; 549 550 /* Check for overlaps */ 551 r = -EEXIST; 552 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 553 struct kvm_memory_slot *s = &kvm->memslots[i]; 554 555 if (s == memslot || !s->npages) 556 continue; 557 if (!((base_gfn + npages <= s->base_gfn) || 558 (base_gfn >= s->base_gfn + s->npages))) 559 goto out_free; 560 } 561 562 /* Free page dirty bitmap if unneeded */ 563 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 564 new.dirty_bitmap = NULL; 565 566 r = -ENOMEM; 567 568 /* Allocate if a slot is being created */ 569 #ifndef CONFIG_S390 570 if (npages && !new.rmap) { 571 new.rmap = vmalloc(npages * sizeof(struct page *)); 572 573 if (!new.rmap) 574 goto out_free; 575 576 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 577 578 new.user_alloc = user_alloc; 579 /* 580 * hva_to_rmmap() serialzies with the mmu_lock and to be 581 * safe it has to ignore memslots with !user_alloc && 582 * !userspace_addr. 583 */ 584 if (user_alloc) 585 new.userspace_addr = mem->userspace_addr; 586 else 587 new.userspace_addr = 0; 588 } 589 if (!npages) 590 goto skip_lpage; 591 592 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 593 unsigned long ugfn; 594 unsigned long j; 595 int lpages; 596 int level = i + 2; 597 598 /* Avoid unused variable warning if no large pages */ 599 (void)level; 600 601 if (new.lpage_info[i]) 602 continue; 603 604 lpages = 1 + (base_gfn + npages - 1) / 605 KVM_PAGES_PER_HPAGE(level); 606 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); 607 608 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 609 610 if (!new.lpage_info[i]) 611 goto out_free; 612 613 memset(new.lpage_info[i], 0, 614 lpages * sizeof(*new.lpage_info[i])); 615 616 if (base_gfn % KVM_PAGES_PER_HPAGE(level)) 617 new.lpage_info[i][0].write_count = 1; 618 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) 619 new.lpage_info[i][lpages - 1].write_count = 1; 620 ugfn = new.userspace_addr >> PAGE_SHIFT; 621 /* 622 * If the gfn and userspace address are not aligned wrt each 623 * other, or if explicitly asked to, disable large page 624 * support for this slot 625 */ 626 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || 627 !largepages_enabled) 628 for (j = 0; j < lpages; ++j) 629 new.lpage_info[i][j].write_count = 1; 630 } 631 632 skip_lpage: 633 634 /* Allocate page dirty bitmap if needed */ 635 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 636 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; 637 638 new.dirty_bitmap = vmalloc(dirty_bytes); 639 if (!new.dirty_bitmap) 640 goto out_free; 641 memset(new.dirty_bitmap, 0, dirty_bytes); 642 if (old.npages) 643 kvm_arch_flush_shadow(kvm); 644 } 645 #else /* not defined CONFIG_S390 */ 646 new.user_alloc = user_alloc; 647 if (user_alloc) 648 new.userspace_addr = mem->userspace_addr; 649 #endif /* not defined CONFIG_S390 */ 650 651 if (!npages) 652 kvm_arch_flush_shadow(kvm); 653 654 spin_lock(&kvm->mmu_lock); 655 if (mem->slot >= kvm->nmemslots) 656 kvm->nmemslots = mem->slot + 1; 657 658 *memslot = new; 659 spin_unlock(&kvm->mmu_lock); 660 661 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); 662 if (r) { 663 spin_lock(&kvm->mmu_lock); 664 *memslot = old; 665 spin_unlock(&kvm->mmu_lock); 666 goto out_free; 667 } 668 669 kvm_free_physmem_slot(&old, npages ? &new : NULL); 670 /* Slot deletion case: we have to update the current slot */ 671 spin_lock(&kvm->mmu_lock); 672 if (!npages) 673 *memslot = old; 674 spin_unlock(&kvm->mmu_lock); 675 #ifdef CONFIG_DMAR 676 /* map the pages in iommu page table */ 677 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 678 if (r) 679 goto out; 680 #endif 681 return 0; 682 683 out_free: 684 kvm_free_physmem_slot(&new, &old); 685 out: 686 return r; 687 688 } 689 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 690 691 int kvm_set_memory_region(struct kvm *kvm, 692 struct kvm_userspace_memory_region *mem, 693 int user_alloc) 694 { 695 int r; 696 697 down_write(&kvm->slots_lock); 698 r = __kvm_set_memory_region(kvm, mem, user_alloc); 699 up_write(&kvm->slots_lock); 700 return r; 701 } 702 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 703 704 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 705 struct 706 kvm_userspace_memory_region *mem, 707 int user_alloc) 708 { 709 if (mem->slot >= KVM_MEMORY_SLOTS) 710 return -EINVAL; 711 return kvm_set_memory_region(kvm, mem, user_alloc); 712 } 713 714 int kvm_get_dirty_log(struct kvm *kvm, 715 struct kvm_dirty_log *log, int *is_dirty) 716 { 717 struct kvm_memory_slot *memslot; 718 int r, i; 719 int n; 720 unsigned long any = 0; 721 722 r = -EINVAL; 723 if (log->slot >= KVM_MEMORY_SLOTS) 724 goto out; 725 726 memslot = &kvm->memslots[log->slot]; 727 r = -ENOENT; 728 if (!memslot->dirty_bitmap) 729 goto out; 730 731 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 732 733 for (i = 0; !any && i < n/sizeof(long); ++i) 734 any = memslot->dirty_bitmap[i]; 735 736 r = -EFAULT; 737 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 738 goto out; 739 740 if (any) 741 *is_dirty = 1; 742 743 r = 0; 744 out: 745 return r; 746 } 747 748 void kvm_disable_largepages(void) 749 { 750 largepages_enabled = false; 751 } 752 EXPORT_SYMBOL_GPL(kvm_disable_largepages); 753 754 int is_error_page(struct page *page) 755 { 756 return page == bad_page; 757 } 758 EXPORT_SYMBOL_GPL(is_error_page); 759 760 int is_error_pfn(pfn_t pfn) 761 { 762 return pfn == bad_pfn; 763 } 764 EXPORT_SYMBOL_GPL(is_error_pfn); 765 766 static inline unsigned long bad_hva(void) 767 { 768 return PAGE_OFFSET; 769 } 770 771 int kvm_is_error_hva(unsigned long addr) 772 { 773 return addr == bad_hva(); 774 } 775 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 776 777 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 778 { 779 int i; 780 781 for (i = 0; i < kvm->nmemslots; ++i) { 782 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 783 784 if (gfn >= memslot->base_gfn 785 && gfn < memslot->base_gfn + memslot->npages) 786 return memslot; 787 } 788 return NULL; 789 } 790 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 791 792 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 793 { 794 gfn = unalias_gfn(kvm, gfn); 795 return gfn_to_memslot_unaliased(kvm, gfn); 796 } 797 798 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 799 { 800 int i; 801 802 gfn = unalias_gfn(kvm, gfn); 803 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 804 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 805 806 if (gfn >= memslot->base_gfn 807 && gfn < memslot->base_gfn + memslot->npages) 808 return 1; 809 } 810 return 0; 811 } 812 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 813 814 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 815 { 816 struct kvm_memory_slot *slot; 817 818 gfn = unalias_gfn(kvm, gfn); 819 slot = gfn_to_memslot_unaliased(kvm, gfn); 820 if (!slot) 821 return bad_hva(); 822 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 823 } 824 EXPORT_SYMBOL_GPL(gfn_to_hva); 825 826 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 827 { 828 struct page *page[1]; 829 unsigned long addr; 830 int npages; 831 pfn_t pfn; 832 833 might_sleep(); 834 835 addr = gfn_to_hva(kvm, gfn); 836 if (kvm_is_error_hva(addr)) { 837 get_page(bad_page); 838 return page_to_pfn(bad_page); 839 } 840 841 npages = get_user_pages_fast(addr, 1, 1, page); 842 843 if (unlikely(npages != 1)) { 844 struct vm_area_struct *vma; 845 846 down_read(¤t->mm->mmap_sem); 847 vma = find_vma(current->mm, addr); 848 849 if (vma == NULL || addr < vma->vm_start || 850 !(vma->vm_flags & VM_PFNMAP)) { 851 up_read(¤t->mm->mmap_sem); 852 get_page(bad_page); 853 return page_to_pfn(bad_page); 854 } 855 856 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 857 up_read(¤t->mm->mmap_sem); 858 BUG_ON(!kvm_is_mmio_pfn(pfn)); 859 } else 860 pfn = page_to_pfn(page[0]); 861 862 return pfn; 863 } 864 865 EXPORT_SYMBOL_GPL(gfn_to_pfn); 866 867 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 868 { 869 pfn_t pfn; 870 871 pfn = gfn_to_pfn(kvm, gfn); 872 if (!kvm_is_mmio_pfn(pfn)) 873 return pfn_to_page(pfn); 874 875 WARN_ON(kvm_is_mmio_pfn(pfn)); 876 877 get_page(bad_page); 878 return bad_page; 879 } 880 881 EXPORT_SYMBOL_GPL(gfn_to_page); 882 883 void kvm_release_page_clean(struct page *page) 884 { 885 kvm_release_pfn_clean(page_to_pfn(page)); 886 } 887 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 888 889 void kvm_release_pfn_clean(pfn_t pfn) 890 { 891 if (!kvm_is_mmio_pfn(pfn)) 892 put_page(pfn_to_page(pfn)); 893 } 894 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 895 896 void kvm_release_page_dirty(struct page *page) 897 { 898 kvm_release_pfn_dirty(page_to_pfn(page)); 899 } 900 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 901 902 void kvm_release_pfn_dirty(pfn_t pfn) 903 { 904 kvm_set_pfn_dirty(pfn); 905 kvm_release_pfn_clean(pfn); 906 } 907 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 908 909 void kvm_set_page_dirty(struct page *page) 910 { 911 kvm_set_pfn_dirty(page_to_pfn(page)); 912 } 913 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 914 915 void kvm_set_pfn_dirty(pfn_t pfn) 916 { 917 if (!kvm_is_mmio_pfn(pfn)) { 918 struct page *page = pfn_to_page(pfn); 919 if (!PageReserved(page)) 920 SetPageDirty(page); 921 } 922 } 923 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 924 925 void kvm_set_pfn_accessed(pfn_t pfn) 926 { 927 if (!kvm_is_mmio_pfn(pfn)) 928 mark_page_accessed(pfn_to_page(pfn)); 929 } 930 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 931 932 void kvm_get_pfn(pfn_t pfn) 933 { 934 if (!kvm_is_mmio_pfn(pfn)) 935 get_page(pfn_to_page(pfn)); 936 } 937 EXPORT_SYMBOL_GPL(kvm_get_pfn); 938 939 static int next_segment(unsigned long len, int offset) 940 { 941 if (len > PAGE_SIZE - offset) 942 return PAGE_SIZE - offset; 943 else 944 return len; 945 } 946 947 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 948 int len) 949 { 950 int r; 951 unsigned long addr; 952 953 addr = gfn_to_hva(kvm, gfn); 954 if (kvm_is_error_hva(addr)) 955 return -EFAULT; 956 r = copy_from_user(data, (void __user *)addr + offset, len); 957 if (r) 958 return -EFAULT; 959 return 0; 960 } 961 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 962 963 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 964 { 965 gfn_t gfn = gpa >> PAGE_SHIFT; 966 int seg; 967 int offset = offset_in_page(gpa); 968 int ret; 969 970 while ((seg = next_segment(len, offset)) != 0) { 971 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 972 if (ret < 0) 973 return ret; 974 offset = 0; 975 len -= seg; 976 data += seg; 977 ++gfn; 978 } 979 return 0; 980 } 981 EXPORT_SYMBOL_GPL(kvm_read_guest); 982 983 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 984 unsigned long len) 985 { 986 int r; 987 unsigned long addr; 988 gfn_t gfn = gpa >> PAGE_SHIFT; 989 int offset = offset_in_page(gpa); 990 991 addr = gfn_to_hva(kvm, gfn); 992 if (kvm_is_error_hva(addr)) 993 return -EFAULT; 994 pagefault_disable(); 995 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 996 pagefault_enable(); 997 if (r) 998 return -EFAULT; 999 return 0; 1000 } 1001 EXPORT_SYMBOL(kvm_read_guest_atomic); 1002 1003 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1004 int offset, int len) 1005 { 1006 int r; 1007 unsigned long addr; 1008 1009 addr = gfn_to_hva(kvm, gfn); 1010 if (kvm_is_error_hva(addr)) 1011 return -EFAULT; 1012 r = copy_to_user((void __user *)addr + offset, data, len); 1013 if (r) 1014 return -EFAULT; 1015 mark_page_dirty(kvm, gfn); 1016 return 0; 1017 } 1018 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1019 1020 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1021 unsigned long len) 1022 { 1023 gfn_t gfn = gpa >> PAGE_SHIFT; 1024 int seg; 1025 int offset = offset_in_page(gpa); 1026 int ret; 1027 1028 while ((seg = next_segment(len, offset)) != 0) { 1029 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1030 if (ret < 0) 1031 return ret; 1032 offset = 0; 1033 len -= seg; 1034 data += seg; 1035 ++gfn; 1036 } 1037 return 0; 1038 } 1039 1040 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1041 { 1042 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1043 } 1044 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1045 1046 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1047 { 1048 gfn_t gfn = gpa >> PAGE_SHIFT; 1049 int seg; 1050 int offset = offset_in_page(gpa); 1051 int ret; 1052 1053 while ((seg = next_segment(len, offset)) != 0) { 1054 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1055 if (ret < 0) 1056 return ret; 1057 offset = 0; 1058 len -= seg; 1059 ++gfn; 1060 } 1061 return 0; 1062 } 1063 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1064 1065 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1066 { 1067 struct kvm_memory_slot *memslot; 1068 1069 gfn = unalias_gfn(kvm, gfn); 1070 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1071 if (memslot && memslot->dirty_bitmap) { 1072 unsigned long rel_gfn = gfn - memslot->base_gfn; 1073 1074 /* avoid RMW */ 1075 if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) 1076 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1077 } 1078 } 1079 1080 /* 1081 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1082 */ 1083 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1084 { 1085 DEFINE_WAIT(wait); 1086 1087 for (;;) { 1088 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1089 1090 if (kvm_arch_vcpu_runnable(vcpu)) { 1091 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1092 break; 1093 } 1094 if (kvm_cpu_has_pending_timer(vcpu)) 1095 break; 1096 if (signal_pending(current)) 1097 break; 1098 1099 schedule(); 1100 } 1101 1102 finish_wait(&vcpu->wq, &wait); 1103 } 1104 1105 void kvm_resched(struct kvm_vcpu *vcpu) 1106 { 1107 if (!need_resched()) 1108 return; 1109 cond_resched(); 1110 } 1111 EXPORT_SYMBOL_GPL(kvm_resched); 1112 1113 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) 1114 { 1115 ktime_t expires; 1116 DEFINE_WAIT(wait); 1117 1118 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1119 1120 /* Sleep for 100 us, and hope lock-holder got scheduled */ 1121 expires = ktime_add_ns(ktime_get(), 100000UL); 1122 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1123 1124 finish_wait(&vcpu->wq, &wait); 1125 } 1126 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 1127 1128 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1129 { 1130 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1131 struct page *page; 1132 1133 if (vmf->pgoff == 0) 1134 page = virt_to_page(vcpu->run); 1135 #ifdef CONFIG_X86 1136 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1137 page = virt_to_page(vcpu->arch.pio_data); 1138 #endif 1139 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1140 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1141 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1142 #endif 1143 else 1144 return VM_FAULT_SIGBUS; 1145 get_page(page); 1146 vmf->page = page; 1147 return 0; 1148 } 1149 1150 static const struct vm_operations_struct kvm_vcpu_vm_ops = { 1151 .fault = kvm_vcpu_fault, 1152 }; 1153 1154 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1155 { 1156 vma->vm_ops = &kvm_vcpu_vm_ops; 1157 return 0; 1158 } 1159 1160 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1161 { 1162 struct kvm_vcpu *vcpu = filp->private_data; 1163 1164 kvm_put_kvm(vcpu->kvm); 1165 return 0; 1166 } 1167 1168 static struct file_operations kvm_vcpu_fops = { 1169 .release = kvm_vcpu_release, 1170 .unlocked_ioctl = kvm_vcpu_ioctl, 1171 .compat_ioctl = kvm_vcpu_ioctl, 1172 .mmap = kvm_vcpu_mmap, 1173 }; 1174 1175 /* 1176 * Allocates an inode for the vcpu. 1177 */ 1178 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1179 { 1180 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); 1181 } 1182 1183 /* 1184 * Creates some virtual cpus. Good luck creating more than one. 1185 */ 1186 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 1187 { 1188 int r; 1189 struct kvm_vcpu *vcpu, *v; 1190 1191 vcpu = kvm_arch_vcpu_create(kvm, id); 1192 if (IS_ERR(vcpu)) 1193 return PTR_ERR(vcpu); 1194 1195 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1196 1197 r = kvm_arch_vcpu_setup(vcpu); 1198 if (r) 1199 return r; 1200 1201 mutex_lock(&kvm->lock); 1202 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1203 r = -EINVAL; 1204 goto vcpu_destroy; 1205 } 1206 1207 kvm_for_each_vcpu(r, v, kvm) 1208 if (v->vcpu_id == id) { 1209 r = -EEXIST; 1210 goto vcpu_destroy; 1211 } 1212 1213 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 1214 1215 /* Now it's all set up, let userspace reach it */ 1216 kvm_get_kvm(kvm); 1217 r = create_vcpu_fd(vcpu); 1218 if (r < 0) { 1219 kvm_put_kvm(kvm); 1220 goto vcpu_destroy; 1221 } 1222 1223 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 1224 smp_wmb(); 1225 atomic_inc(&kvm->online_vcpus); 1226 1227 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1228 if (kvm->bsp_vcpu_id == id) 1229 kvm->bsp_vcpu = vcpu; 1230 #endif 1231 mutex_unlock(&kvm->lock); 1232 return r; 1233 1234 vcpu_destroy: 1235 mutex_unlock(&kvm->lock); 1236 kvm_arch_vcpu_destroy(vcpu); 1237 return r; 1238 } 1239 1240 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1241 { 1242 if (sigset) { 1243 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1244 vcpu->sigset_active = 1; 1245 vcpu->sigset = *sigset; 1246 } else 1247 vcpu->sigset_active = 0; 1248 return 0; 1249 } 1250 1251 static long kvm_vcpu_ioctl(struct file *filp, 1252 unsigned int ioctl, unsigned long arg) 1253 { 1254 struct kvm_vcpu *vcpu = filp->private_data; 1255 void __user *argp = (void __user *)arg; 1256 int r; 1257 struct kvm_fpu *fpu = NULL; 1258 struct kvm_sregs *kvm_sregs = NULL; 1259 1260 if (vcpu->kvm->mm != current->mm) 1261 return -EIO; 1262 switch (ioctl) { 1263 case KVM_RUN: 1264 r = -EINVAL; 1265 if (arg) 1266 goto out; 1267 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1268 break; 1269 case KVM_GET_REGS: { 1270 struct kvm_regs *kvm_regs; 1271 1272 r = -ENOMEM; 1273 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1274 if (!kvm_regs) 1275 goto out; 1276 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1277 if (r) 1278 goto out_free1; 1279 r = -EFAULT; 1280 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1281 goto out_free1; 1282 r = 0; 1283 out_free1: 1284 kfree(kvm_regs); 1285 break; 1286 } 1287 case KVM_SET_REGS: { 1288 struct kvm_regs *kvm_regs; 1289 1290 r = -ENOMEM; 1291 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1292 if (!kvm_regs) 1293 goto out; 1294 r = -EFAULT; 1295 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1296 goto out_free2; 1297 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1298 if (r) 1299 goto out_free2; 1300 r = 0; 1301 out_free2: 1302 kfree(kvm_regs); 1303 break; 1304 } 1305 case KVM_GET_SREGS: { 1306 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1307 r = -ENOMEM; 1308 if (!kvm_sregs) 1309 goto out; 1310 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1311 if (r) 1312 goto out; 1313 r = -EFAULT; 1314 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1315 goto out; 1316 r = 0; 1317 break; 1318 } 1319 case KVM_SET_SREGS: { 1320 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1321 r = -ENOMEM; 1322 if (!kvm_sregs) 1323 goto out; 1324 r = -EFAULT; 1325 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1326 goto out; 1327 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1328 if (r) 1329 goto out; 1330 r = 0; 1331 break; 1332 } 1333 case KVM_GET_MP_STATE: { 1334 struct kvm_mp_state mp_state; 1335 1336 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1337 if (r) 1338 goto out; 1339 r = -EFAULT; 1340 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1341 goto out; 1342 r = 0; 1343 break; 1344 } 1345 case KVM_SET_MP_STATE: { 1346 struct kvm_mp_state mp_state; 1347 1348 r = -EFAULT; 1349 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1350 goto out; 1351 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1352 if (r) 1353 goto out; 1354 r = 0; 1355 break; 1356 } 1357 case KVM_TRANSLATE: { 1358 struct kvm_translation tr; 1359 1360 r = -EFAULT; 1361 if (copy_from_user(&tr, argp, sizeof tr)) 1362 goto out; 1363 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1364 if (r) 1365 goto out; 1366 r = -EFAULT; 1367 if (copy_to_user(argp, &tr, sizeof tr)) 1368 goto out; 1369 r = 0; 1370 break; 1371 } 1372 case KVM_SET_GUEST_DEBUG: { 1373 struct kvm_guest_debug dbg; 1374 1375 r = -EFAULT; 1376 if (copy_from_user(&dbg, argp, sizeof dbg)) 1377 goto out; 1378 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 1379 if (r) 1380 goto out; 1381 r = 0; 1382 break; 1383 } 1384 case KVM_SET_SIGNAL_MASK: { 1385 struct kvm_signal_mask __user *sigmask_arg = argp; 1386 struct kvm_signal_mask kvm_sigmask; 1387 sigset_t sigset, *p; 1388 1389 p = NULL; 1390 if (argp) { 1391 r = -EFAULT; 1392 if (copy_from_user(&kvm_sigmask, argp, 1393 sizeof kvm_sigmask)) 1394 goto out; 1395 r = -EINVAL; 1396 if (kvm_sigmask.len != sizeof sigset) 1397 goto out; 1398 r = -EFAULT; 1399 if (copy_from_user(&sigset, sigmask_arg->sigset, 1400 sizeof sigset)) 1401 goto out; 1402 p = &sigset; 1403 } 1404 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1405 break; 1406 } 1407 case KVM_GET_FPU: { 1408 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1409 r = -ENOMEM; 1410 if (!fpu) 1411 goto out; 1412 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1413 if (r) 1414 goto out; 1415 r = -EFAULT; 1416 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1417 goto out; 1418 r = 0; 1419 break; 1420 } 1421 case KVM_SET_FPU: { 1422 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1423 r = -ENOMEM; 1424 if (!fpu) 1425 goto out; 1426 r = -EFAULT; 1427 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1428 goto out; 1429 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1430 if (r) 1431 goto out; 1432 r = 0; 1433 break; 1434 } 1435 default: 1436 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1437 } 1438 out: 1439 kfree(fpu); 1440 kfree(kvm_sregs); 1441 return r; 1442 } 1443 1444 static long kvm_vm_ioctl(struct file *filp, 1445 unsigned int ioctl, unsigned long arg) 1446 { 1447 struct kvm *kvm = filp->private_data; 1448 void __user *argp = (void __user *)arg; 1449 int r; 1450 1451 if (kvm->mm != current->mm) 1452 return -EIO; 1453 switch (ioctl) { 1454 case KVM_CREATE_VCPU: 1455 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1456 if (r < 0) 1457 goto out; 1458 break; 1459 case KVM_SET_USER_MEMORY_REGION: { 1460 struct kvm_userspace_memory_region kvm_userspace_mem; 1461 1462 r = -EFAULT; 1463 if (copy_from_user(&kvm_userspace_mem, argp, 1464 sizeof kvm_userspace_mem)) 1465 goto out; 1466 1467 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1468 if (r) 1469 goto out; 1470 break; 1471 } 1472 case KVM_GET_DIRTY_LOG: { 1473 struct kvm_dirty_log log; 1474 1475 r = -EFAULT; 1476 if (copy_from_user(&log, argp, sizeof log)) 1477 goto out; 1478 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1479 if (r) 1480 goto out; 1481 break; 1482 } 1483 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1484 case KVM_REGISTER_COALESCED_MMIO: { 1485 struct kvm_coalesced_mmio_zone zone; 1486 r = -EFAULT; 1487 if (copy_from_user(&zone, argp, sizeof zone)) 1488 goto out; 1489 r = -ENXIO; 1490 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1491 if (r) 1492 goto out; 1493 r = 0; 1494 break; 1495 } 1496 case KVM_UNREGISTER_COALESCED_MMIO: { 1497 struct kvm_coalesced_mmio_zone zone; 1498 r = -EFAULT; 1499 if (copy_from_user(&zone, argp, sizeof zone)) 1500 goto out; 1501 r = -ENXIO; 1502 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1503 if (r) 1504 goto out; 1505 r = 0; 1506 break; 1507 } 1508 #endif 1509 case KVM_IRQFD: { 1510 struct kvm_irqfd data; 1511 1512 r = -EFAULT; 1513 if (copy_from_user(&data, argp, sizeof data)) 1514 goto out; 1515 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); 1516 break; 1517 } 1518 case KVM_IOEVENTFD: { 1519 struct kvm_ioeventfd data; 1520 1521 r = -EFAULT; 1522 if (copy_from_user(&data, argp, sizeof data)) 1523 goto out; 1524 r = kvm_ioeventfd(kvm, &data); 1525 break; 1526 } 1527 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1528 case KVM_SET_BOOT_CPU_ID: 1529 r = 0; 1530 mutex_lock(&kvm->lock); 1531 if (atomic_read(&kvm->online_vcpus) != 0) 1532 r = -EBUSY; 1533 else 1534 kvm->bsp_vcpu_id = arg; 1535 mutex_unlock(&kvm->lock); 1536 break; 1537 #endif 1538 default: 1539 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1540 if (r == -ENOTTY) 1541 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); 1542 } 1543 out: 1544 return r; 1545 } 1546 1547 #ifdef CONFIG_COMPAT 1548 struct compat_kvm_dirty_log { 1549 __u32 slot; 1550 __u32 padding1; 1551 union { 1552 compat_uptr_t dirty_bitmap; /* one bit per page */ 1553 __u64 padding2; 1554 }; 1555 }; 1556 1557 static long kvm_vm_compat_ioctl(struct file *filp, 1558 unsigned int ioctl, unsigned long arg) 1559 { 1560 struct kvm *kvm = filp->private_data; 1561 int r; 1562 1563 if (kvm->mm != current->mm) 1564 return -EIO; 1565 switch (ioctl) { 1566 case KVM_GET_DIRTY_LOG: { 1567 struct compat_kvm_dirty_log compat_log; 1568 struct kvm_dirty_log log; 1569 1570 r = -EFAULT; 1571 if (copy_from_user(&compat_log, (void __user *)arg, 1572 sizeof(compat_log))) 1573 goto out; 1574 log.slot = compat_log.slot; 1575 log.padding1 = compat_log.padding1; 1576 log.padding2 = compat_log.padding2; 1577 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 1578 1579 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1580 if (r) 1581 goto out; 1582 break; 1583 } 1584 default: 1585 r = kvm_vm_ioctl(filp, ioctl, arg); 1586 } 1587 1588 out: 1589 return r; 1590 } 1591 #endif 1592 1593 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1594 { 1595 struct page *page[1]; 1596 unsigned long addr; 1597 int npages; 1598 gfn_t gfn = vmf->pgoff; 1599 struct kvm *kvm = vma->vm_file->private_data; 1600 1601 addr = gfn_to_hva(kvm, gfn); 1602 if (kvm_is_error_hva(addr)) 1603 return VM_FAULT_SIGBUS; 1604 1605 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1606 NULL); 1607 if (unlikely(npages != 1)) 1608 return VM_FAULT_SIGBUS; 1609 1610 vmf->page = page[0]; 1611 return 0; 1612 } 1613 1614 static const struct vm_operations_struct kvm_vm_vm_ops = { 1615 .fault = kvm_vm_fault, 1616 }; 1617 1618 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1619 { 1620 vma->vm_ops = &kvm_vm_vm_ops; 1621 return 0; 1622 } 1623 1624 static struct file_operations kvm_vm_fops = { 1625 .release = kvm_vm_release, 1626 .unlocked_ioctl = kvm_vm_ioctl, 1627 #ifdef CONFIG_COMPAT 1628 .compat_ioctl = kvm_vm_compat_ioctl, 1629 #endif 1630 .mmap = kvm_vm_mmap, 1631 }; 1632 1633 static int kvm_dev_ioctl_create_vm(void) 1634 { 1635 int fd; 1636 struct kvm *kvm; 1637 1638 kvm = kvm_create_vm(); 1639 if (IS_ERR(kvm)) 1640 return PTR_ERR(kvm); 1641 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); 1642 if (fd < 0) 1643 kvm_put_kvm(kvm); 1644 1645 return fd; 1646 } 1647 1648 static long kvm_dev_ioctl_check_extension_generic(long arg) 1649 { 1650 switch (arg) { 1651 case KVM_CAP_USER_MEMORY: 1652 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1653 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 1654 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1655 case KVM_CAP_SET_BOOT_CPU_ID: 1656 #endif 1657 case KVM_CAP_INTERNAL_ERROR_DATA: 1658 return 1; 1659 #ifdef CONFIG_HAVE_KVM_IRQCHIP 1660 case KVM_CAP_IRQ_ROUTING: 1661 return KVM_MAX_IRQ_ROUTES; 1662 #endif 1663 default: 1664 break; 1665 } 1666 return kvm_dev_ioctl_check_extension(arg); 1667 } 1668 1669 static long kvm_dev_ioctl(struct file *filp, 1670 unsigned int ioctl, unsigned long arg) 1671 { 1672 long r = -EINVAL; 1673 1674 switch (ioctl) { 1675 case KVM_GET_API_VERSION: 1676 r = -EINVAL; 1677 if (arg) 1678 goto out; 1679 r = KVM_API_VERSION; 1680 break; 1681 case KVM_CREATE_VM: 1682 r = -EINVAL; 1683 if (arg) 1684 goto out; 1685 r = kvm_dev_ioctl_create_vm(); 1686 break; 1687 case KVM_CHECK_EXTENSION: 1688 r = kvm_dev_ioctl_check_extension_generic(arg); 1689 break; 1690 case KVM_GET_VCPU_MMAP_SIZE: 1691 r = -EINVAL; 1692 if (arg) 1693 goto out; 1694 r = PAGE_SIZE; /* struct kvm_run */ 1695 #ifdef CONFIG_X86 1696 r += PAGE_SIZE; /* pio data page */ 1697 #endif 1698 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1699 r += PAGE_SIZE; /* coalesced mmio ring page */ 1700 #endif 1701 break; 1702 case KVM_TRACE_ENABLE: 1703 case KVM_TRACE_PAUSE: 1704 case KVM_TRACE_DISABLE: 1705 r = -EOPNOTSUPP; 1706 break; 1707 default: 1708 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1709 } 1710 out: 1711 return r; 1712 } 1713 1714 static struct file_operations kvm_chardev_ops = { 1715 .unlocked_ioctl = kvm_dev_ioctl, 1716 .compat_ioctl = kvm_dev_ioctl, 1717 }; 1718 1719 static struct miscdevice kvm_dev = { 1720 KVM_MINOR, 1721 "kvm", 1722 &kvm_chardev_ops, 1723 }; 1724 1725 static void hardware_enable(void *junk) 1726 { 1727 int cpu = raw_smp_processor_id(); 1728 int r; 1729 1730 if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1731 return; 1732 1733 cpumask_set_cpu(cpu, cpus_hardware_enabled); 1734 1735 r = kvm_arch_hardware_enable(NULL); 1736 1737 if (r) { 1738 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1739 atomic_inc(&hardware_enable_failed); 1740 printk(KERN_INFO "kvm: enabling virtualization on " 1741 "CPU%d failed\n", cpu); 1742 } 1743 } 1744 1745 static void hardware_disable(void *junk) 1746 { 1747 int cpu = raw_smp_processor_id(); 1748 1749 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1750 return; 1751 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1752 kvm_arch_hardware_disable(NULL); 1753 } 1754 1755 static void hardware_disable_all_nolock(void) 1756 { 1757 BUG_ON(!kvm_usage_count); 1758 1759 kvm_usage_count--; 1760 if (!kvm_usage_count) 1761 on_each_cpu(hardware_disable, NULL, 1); 1762 } 1763 1764 static void hardware_disable_all(void) 1765 { 1766 spin_lock(&kvm_lock); 1767 hardware_disable_all_nolock(); 1768 spin_unlock(&kvm_lock); 1769 } 1770 1771 static int hardware_enable_all(void) 1772 { 1773 int r = 0; 1774 1775 spin_lock(&kvm_lock); 1776 1777 kvm_usage_count++; 1778 if (kvm_usage_count == 1) { 1779 atomic_set(&hardware_enable_failed, 0); 1780 on_each_cpu(hardware_enable, NULL, 1); 1781 1782 if (atomic_read(&hardware_enable_failed)) { 1783 hardware_disable_all_nolock(); 1784 r = -EBUSY; 1785 } 1786 } 1787 1788 spin_unlock(&kvm_lock); 1789 1790 return r; 1791 } 1792 1793 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1794 void *v) 1795 { 1796 int cpu = (long)v; 1797 1798 if (!kvm_usage_count) 1799 return NOTIFY_OK; 1800 1801 val &= ~CPU_TASKS_FROZEN; 1802 switch (val) { 1803 case CPU_DYING: 1804 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1805 cpu); 1806 hardware_disable(NULL); 1807 break; 1808 case CPU_UP_CANCELED: 1809 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1810 cpu); 1811 smp_call_function_single(cpu, hardware_disable, NULL, 1); 1812 break; 1813 case CPU_ONLINE: 1814 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1815 cpu); 1816 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1817 break; 1818 } 1819 return NOTIFY_OK; 1820 } 1821 1822 1823 asmlinkage void kvm_handle_fault_on_reboot(void) 1824 { 1825 if (kvm_rebooting) 1826 /* spin while reset goes on */ 1827 while (true) 1828 ; 1829 /* Fault while not rebooting. We want the trace. */ 1830 BUG(); 1831 } 1832 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1833 1834 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1835 void *v) 1836 { 1837 /* 1838 * Some (well, at least mine) BIOSes hang on reboot if 1839 * in vmx root mode. 1840 * 1841 * And Intel TXT required VMX off for all cpu when system shutdown. 1842 */ 1843 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1844 kvm_rebooting = true; 1845 on_each_cpu(hardware_disable, NULL, 1); 1846 return NOTIFY_OK; 1847 } 1848 1849 static struct notifier_block kvm_reboot_notifier = { 1850 .notifier_call = kvm_reboot, 1851 .priority = 0, 1852 }; 1853 1854 void kvm_io_bus_init(struct kvm_io_bus *bus) 1855 { 1856 memset(bus, 0, sizeof(*bus)); 1857 } 1858 1859 void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1860 { 1861 int i; 1862 1863 for (i = 0; i < bus->dev_count; i++) { 1864 struct kvm_io_device *pos = bus->devs[i]; 1865 1866 kvm_iodevice_destructor(pos); 1867 } 1868 } 1869 1870 /* kvm_io_bus_write - called under kvm->slots_lock */ 1871 int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, 1872 int len, const void *val) 1873 { 1874 int i; 1875 for (i = 0; i < bus->dev_count; i++) 1876 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1877 return 0; 1878 return -EOPNOTSUPP; 1879 } 1880 1881 /* kvm_io_bus_read - called under kvm->slots_lock */ 1882 int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) 1883 { 1884 int i; 1885 for (i = 0; i < bus->dev_count; i++) 1886 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 1887 return 0; 1888 return -EOPNOTSUPP; 1889 } 1890 1891 int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 1892 struct kvm_io_device *dev) 1893 { 1894 int ret; 1895 1896 down_write(&kvm->slots_lock); 1897 ret = __kvm_io_bus_register_dev(bus, dev); 1898 up_write(&kvm->slots_lock); 1899 1900 return ret; 1901 } 1902 1903 /* An unlocked version. Caller must have write lock on slots_lock. */ 1904 int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, 1905 struct kvm_io_device *dev) 1906 { 1907 if (bus->dev_count > NR_IOBUS_DEVS-1) 1908 return -ENOSPC; 1909 1910 bus->devs[bus->dev_count++] = dev; 1911 1912 return 0; 1913 } 1914 1915 void kvm_io_bus_unregister_dev(struct kvm *kvm, 1916 struct kvm_io_bus *bus, 1917 struct kvm_io_device *dev) 1918 { 1919 down_write(&kvm->slots_lock); 1920 __kvm_io_bus_unregister_dev(bus, dev); 1921 up_write(&kvm->slots_lock); 1922 } 1923 1924 /* An unlocked version. Caller must have write lock on slots_lock. */ 1925 void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 1926 struct kvm_io_device *dev) 1927 { 1928 int i; 1929 1930 for (i = 0; i < bus->dev_count; i++) 1931 if (bus->devs[i] == dev) { 1932 bus->devs[i] = bus->devs[--bus->dev_count]; 1933 break; 1934 } 1935 } 1936 1937 static struct notifier_block kvm_cpu_notifier = { 1938 .notifier_call = kvm_cpu_hotplug, 1939 .priority = 20, /* must be > scheduler priority */ 1940 }; 1941 1942 static int vm_stat_get(void *_offset, u64 *val) 1943 { 1944 unsigned offset = (long)_offset; 1945 struct kvm *kvm; 1946 1947 *val = 0; 1948 spin_lock(&kvm_lock); 1949 list_for_each_entry(kvm, &vm_list, vm_list) 1950 *val += *(u32 *)((void *)kvm + offset); 1951 spin_unlock(&kvm_lock); 1952 return 0; 1953 } 1954 1955 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 1956 1957 static int vcpu_stat_get(void *_offset, u64 *val) 1958 { 1959 unsigned offset = (long)_offset; 1960 struct kvm *kvm; 1961 struct kvm_vcpu *vcpu; 1962 int i; 1963 1964 *val = 0; 1965 spin_lock(&kvm_lock); 1966 list_for_each_entry(kvm, &vm_list, vm_list) 1967 kvm_for_each_vcpu(i, vcpu, kvm) 1968 *val += *(u32 *)((void *)vcpu + offset); 1969 1970 spin_unlock(&kvm_lock); 1971 return 0; 1972 } 1973 1974 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 1975 1976 static const struct file_operations *stat_fops[] = { 1977 [KVM_STAT_VCPU] = &vcpu_stat_fops, 1978 [KVM_STAT_VM] = &vm_stat_fops, 1979 }; 1980 1981 static void kvm_init_debug(void) 1982 { 1983 struct kvm_stats_debugfs_item *p; 1984 1985 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 1986 for (p = debugfs_entries; p->name; ++p) 1987 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 1988 (void *)(long)p->offset, 1989 stat_fops[p->kind]); 1990 } 1991 1992 static void kvm_exit_debug(void) 1993 { 1994 struct kvm_stats_debugfs_item *p; 1995 1996 for (p = debugfs_entries; p->name; ++p) 1997 debugfs_remove(p->dentry); 1998 debugfs_remove(kvm_debugfs_dir); 1999 } 2000 2001 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2002 { 2003 if (kvm_usage_count) 2004 hardware_disable(NULL); 2005 return 0; 2006 } 2007 2008 static int kvm_resume(struct sys_device *dev) 2009 { 2010 if (kvm_usage_count) 2011 hardware_enable(NULL); 2012 return 0; 2013 } 2014 2015 static struct sysdev_class kvm_sysdev_class = { 2016 .name = "kvm", 2017 .suspend = kvm_suspend, 2018 .resume = kvm_resume, 2019 }; 2020 2021 static struct sys_device kvm_sysdev = { 2022 .id = 0, 2023 .cls = &kvm_sysdev_class, 2024 }; 2025 2026 struct page *bad_page; 2027 pfn_t bad_pfn; 2028 2029 static inline 2030 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 2031 { 2032 return container_of(pn, struct kvm_vcpu, preempt_notifier); 2033 } 2034 2035 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 2036 { 2037 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2038 2039 kvm_arch_vcpu_load(vcpu, cpu); 2040 } 2041 2042 static void kvm_sched_out(struct preempt_notifier *pn, 2043 struct task_struct *next) 2044 { 2045 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2046 2047 kvm_arch_vcpu_put(vcpu); 2048 } 2049 2050 int kvm_init(void *opaque, unsigned int vcpu_size, 2051 struct module *module) 2052 { 2053 int r; 2054 int cpu; 2055 2056 r = kvm_arch_init(opaque); 2057 if (r) 2058 goto out_fail; 2059 2060 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2061 2062 if (bad_page == NULL) { 2063 r = -ENOMEM; 2064 goto out; 2065 } 2066 2067 bad_pfn = page_to_pfn(bad_page); 2068 2069 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2070 r = -ENOMEM; 2071 goto out_free_0; 2072 } 2073 2074 r = kvm_arch_hardware_setup(); 2075 if (r < 0) 2076 goto out_free_0a; 2077 2078 for_each_online_cpu(cpu) { 2079 smp_call_function_single(cpu, 2080 kvm_arch_check_processor_compat, 2081 &r, 1); 2082 if (r < 0) 2083 goto out_free_1; 2084 } 2085 2086 r = register_cpu_notifier(&kvm_cpu_notifier); 2087 if (r) 2088 goto out_free_2; 2089 register_reboot_notifier(&kvm_reboot_notifier); 2090 2091 r = sysdev_class_register(&kvm_sysdev_class); 2092 if (r) 2093 goto out_free_3; 2094 2095 r = sysdev_register(&kvm_sysdev); 2096 if (r) 2097 goto out_free_4; 2098 2099 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2100 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2101 __alignof__(struct kvm_vcpu), 2102 0, NULL); 2103 if (!kvm_vcpu_cache) { 2104 r = -ENOMEM; 2105 goto out_free_5; 2106 } 2107 2108 kvm_chardev_ops.owner = module; 2109 kvm_vm_fops.owner = module; 2110 kvm_vcpu_fops.owner = module; 2111 2112 r = misc_register(&kvm_dev); 2113 if (r) { 2114 printk(KERN_ERR "kvm: misc device register failed\n"); 2115 goto out_free; 2116 } 2117 2118 kvm_preempt_ops.sched_in = kvm_sched_in; 2119 kvm_preempt_ops.sched_out = kvm_sched_out; 2120 2121 kvm_init_debug(); 2122 2123 return 0; 2124 2125 out_free: 2126 kmem_cache_destroy(kvm_vcpu_cache); 2127 out_free_5: 2128 sysdev_unregister(&kvm_sysdev); 2129 out_free_4: 2130 sysdev_class_unregister(&kvm_sysdev_class); 2131 out_free_3: 2132 unregister_reboot_notifier(&kvm_reboot_notifier); 2133 unregister_cpu_notifier(&kvm_cpu_notifier); 2134 out_free_2: 2135 out_free_1: 2136 kvm_arch_hardware_unsetup(); 2137 out_free_0a: 2138 free_cpumask_var(cpus_hardware_enabled); 2139 out_free_0: 2140 __free_page(bad_page); 2141 out: 2142 kvm_arch_exit(); 2143 out_fail: 2144 return r; 2145 } 2146 EXPORT_SYMBOL_GPL(kvm_init); 2147 2148 void kvm_exit(void) 2149 { 2150 tracepoint_synchronize_unregister(); 2151 kvm_exit_debug(); 2152 misc_deregister(&kvm_dev); 2153 kmem_cache_destroy(kvm_vcpu_cache); 2154 sysdev_unregister(&kvm_sysdev); 2155 sysdev_class_unregister(&kvm_sysdev_class); 2156 unregister_reboot_notifier(&kvm_reboot_notifier); 2157 unregister_cpu_notifier(&kvm_cpu_notifier); 2158 on_each_cpu(hardware_disable, NULL, 1); 2159 kvm_arch_hardware_unsetup(); 2160 kvm_arch_exit(); 2161 free_cpumask_var(cpus_hardware_enabled); 2162 __free_page(bad_page); 2163 } 2164 EXPORT_SYMBOL_GPL(kvm_exit); 2165