1 /* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * This module enables machines with Intel VT-x extensions to run virtual 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * 9 * Authors: 10 * Avi Kivity <avi@qumranet.com> 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "iodev.h" 19 20 #include <linux/kvm_host.h> 21 #include <linux/kvm.h> 22 #include <linux/module.h> 23 #include <linux/errno.h> 24 #include <linux/percpu.h> 25 #include <linux/gfp.h> 26 #include <linux/mm.h> 27 #include <linux/miscdevice.h> 28 #include <linux/vmalloc.h> 29 #include <linux/reboot.h> 30 #include <linux/debugfs.h> 31 #include <linux/highmem.h> 32 #include <linux/file.h> 33 #include <linux/sysdev.h> 34 #include <linux/cpu.h> 35 #include <linux/sched.h> 36 #include <linux/cpumask.h> 37 #include <linux/smp.h> 38 #include <linux/anon_inodes.h> 39 #include <linux/profile.h> 40 #include <linux/kvm_para.h> 41 #include <linux/pagemap.h> 42 #include <linux/mman.h> 43 #include <linux/swap.h> 44 45 #include <asm/processor.h> 46 #include <asm/io.h> 47 #include <asm/uaccess.h> 48 #include <asm/pgtable.h> 49 50 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 51 #include "coalesced_mmio.h" 52 #endif 53 54 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 55 #include <linux/pci.h> 56 #include <linux/interrupt.h> 57 #include "irq.h" 58 #endif 59 60 MODULE_AUTHOR("Qumranet"); 61 MODULE_LICENSE("GPL"); 62 63 DEFINE_SPINLOCK(kvm_lock); 64 LIST_HEAD(vm_list); 65 66 static cpumask_t cpus_hardware_enabled; 67 68 struct kmem_cache *kvm_vcpu_cache; 69 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 70 71 static __read_mostly struct preempt_ops kvm_preempt_ops; 72 73 struct dentry *kvm_debugfs_dir; 74 75 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 76 unsigned long arg); 77 78 bool kvm_rebooting; 79 80 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 81 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 82 int assigned_dev_id) 83 { 84 struct list_head *ptr; 85 struct kvm_assigned_dev_kernel *match; 86 87 list_for_each(ptr, head) { 88 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 89 if (match->assigned_dev_id == assigned_dev_id) 90 return match; 91 } 92 return NULL; 93 } 94 95 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 96 { 97 struct kvm_assigned_dev_kernel *assigned_dev; 98 99 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 100 interrupt_work); 101 102 /* This is taken to safely inject irq inside the guest. When 103 * the interrupt injection (or the ioapic code) uses a 104 * finer-grained lock, update this 105 */ 106 mutex_lock(&assigned_dev->kvm->lock); 107 kvm_set_irq(assigned_dev->kvm, 108 assigned_dev->guest_irq, 1); 109 mutex_unlock(&assigned_dev->kvm->lock); 110 kvm_put_kvm(assigned_dev->kvm); 111 } 112 113 /* FIXME: Implement the OR logic needed to make shared interrupts on 114 * this line behave properly 115 */ 116 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 117 { 118 struct kvm_assigned_dev_kernel *assigned_dev = 119 (struct kvm_assigned_dev_kernel *) dev_id; 120 121 kvm_get_kvm(assigned_dev->kvm); 122 schedule_work(&assigned_dev->interrupt_work); 123 disable_irq_nosync(irq); 124 return IRQ_HANDLED; 125 } 126 127 /* Ack the irq line for an assigned device */ 128 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 129 { 130 struct kvm_assigned_dev_kernel *dev; 131 132 if (kian->gsi == -1) 133 return; 134 135 dev = container_of(kian, struct kvm_assigned_dev_kernel, 136 ack_notifier); 137 kvm_set_irq(dev->kvm, dev->guest_irq, 0); 138 enable_irq(dev->host_irq); 139 } 140 141 static void kvm_free_assigned_device(struct kvm *kvm, 142 struct kvm_assigned_dev_kernel 143 *assigned_dev) 144 { 145 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) 146 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 147 148 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 149 150 if (cancel_work_sync(&assigned_dev->interrupt_work)) 151 /* We had pending work. That means we will have to take 152 * care of kvm_put_kvm. 153 */ 154 kvm_put_kvm(kvm); 155 156 pci_release_regions(assigned_dev->dev); 157 pci_disable_device(assigned_dev->dev); 158 pci_dev_put(assigned_dev->dev); 159 160 list_del(&assigned_dev->list); 161 kfree(assigned_dev); 162 } 163 164 void kvm_free_all_assigned_devices(struct kvm *kvm) 165 { 166 struct list_head *ptr, *ptr2; 167 struct kvm_assigned_dev_kernel *assigned_dev; 168 169 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 170 assigned_dev = list_entry(ptr, 171 struct kvm_assigned_dev_kernel, 172 list); 173 174 kvm_free_assigned_device(kvm, assigned_dev); 175 } 176 } 177 178 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 179 struct kvm_assigned_irq 180 *assigned_irq) 181 { 182 int r = 0; 183 struct kvm_assigned_dev_kernel *match; 184 185 mutex_lock(&kvm->lock); 186 187 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 188 assigned_irq->assigned_dev_id); 189 if (!match) { 190 mutex_unlock(&kvm->lock); 191 return -EINVAL; 192 } 193 194 if (match->irq_requested) { 195 match->guest_irq = assigned_irq->guest_irq; 196 match->ack_notifier.gsi = assigned_irq->guest_irq; 197 mutex_unlock(&kvm->lock); 198 return 0; 199 } 200 201 INIT_WORK(&match->interrupt_work, 202 kvm_assigned_dev_interrupt_work_handler); 203 204 if (irqchip_in_kernel(kvm)) { 205 if (!capable(CAP_SYS_RAWIO)) { 206 r = -EPERM; 207 goto out_release; 208 } 209 210 if (assigned_irq->host_irq) 211 match->host_irq = assigned_irq->host_irq; 212 else 213 match->host_irq = match->dev->irq; 214 match->guest_irq = assigned_irq->guest_irq; 215 match->ack_notifier.gsi = assigned_irq->guest_irq; 216 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 217 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 218 219 /* Even though this is PCI, we don't want to use shared 220 * interrupts. Sharing host devices with guest-assigned devices 221 * on the same interrupt line is not a happy situation: there 222 * are going to be long delays in accepting, acking, etc. 223 */ 224 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, 225 "kvm_assigned_device", (void *)match)) { 226 r = -EIO; 227 goto out_release; 228 } 229 } 230 231 match->irq_requested = true; 232 mutex_unlock(&kvm->lock); 233 return r; 234 out_release: 235 mutex_unlock(&kvm->lock); 236 kvm_free_assigned_device(kvm, match); 237 return r; 238 } 239 240 static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 241 struct kvm_assigned_pci_dev *assigned_dev) 242 { 243 int r = 0; 244 struct kvm_assigned_dev_kernel *match; 245 struct pci_dev *dev; 246 247 mutex_lock(&kvm->lock); 248 249 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 250 assigned_dev->assigned_dev_id); 251 if (match) { 252 /* device already assigned */ 253 r = -EINVAL; 254 goto out; 255 } 256 257 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 258 if (match == NULL) { 259 printk(KERN_INFO "%s: Couldn't allocate memory\n", 260 __func__); 261 r = -ENOMEM; 262 goto out; 263 } 264 dev = pci_get_bus_and_slot(assigned_dev->busnr, 265 assigned_dev->devfn); 266 if (!dev) { 267 printk(KERN_INFO "%s: host device not found\n", __func__); 268 r = -EINVAL; 269 goto out_free; 270 } 271 if (pci_enable_device(dev)) { 272 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 273 r = -EBUSY; 274 goto out_put; 275 } 276 r = pci_request_regions(dev, "kvm_assigned_device"); 277 if (r) { 278 printk(KERN_INFO "%s: Could not get access to device regions\n", 279 __func__); 280 goto out_disable; 281 } 282 match->assigned_dev_id = assigned_dev->assigned_dev_id; 283 match->host_busnr = assigned_dev->busnr; 284 match->host_devfn = assigned_dev->devfn; 285 match->dev = dev; 286 287 match->kvm = kvm; 288 289 list_add(&match->list, &kvm->arch.assigned_dev_head); 290 291 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 292 r = kvm_iommu_map_guest(kvm, match); 293 if (r) 294 goto out_list_del; 295 } 296 297 out: 298 mutex_unlock(&kvm->lock); 299 return r; 300 out_list_del: 301 list_del(&match->list); 302 pci_release_regions(dev); 303 out_disable: 304 pci_disable_device(dev); 305 out_put: 306 pci_dev_put(dev); 307 out_free: 308 kfree(match); 309 mutex_unlock(&kvm->lock); 310 return r; 311 } 312 #endif 313 314 static inline int valid_vcpu(int n) 315 { 316 return likely(n >= 0 && n < KVM_MAX_VCPUS); 317 } 318 319 inline int kvm_is_mmio_pfn(pfn_t pfn) 320 { 321 if (pfn_valid(pfn)) 322 return PageReserved(pfn_to_page(pfn)); 323 324 return true; 325 } 326 327 /* 328 * Switches to specified vcpu, until a matching vcpu_put() 329 */ 330 void vcpu_load(struct kvm_vcpu *vcpu) 331 { 332 int cpu; 333 334 mutex_lock(&vcpu->mutex); 335 cpu = get_cpu(); 336 preempt_notifier_register(&vcpu->preempt_notifier); 337 kvm_arch_vcpu_load(vcpu, cpu); 338 put_cpu(); 339 } 340 341 void vcpu_put(struct kvm_vcpu *vcpu) 342 { 343 preempt_disable(); 344 kvm_arch_vcpu_put(vcpu); 345 preempt_notifier_unregister(&vcpu->preempt_notifier); 346 preempt_enable(); 347 mutex_unlock(&vcpu->mutex); 348 } 349 350 static void ack_flush(void *_completed) 351 { 352 } 353 354 void kvm_flush_remote_tlbs(struct kvm *kvm) 355 { 356 int i, cpu, me; 357 cpumask_t cpus; 358 struct kvm_vcpu *vcpu; 359 360 me = get_cpu(); 361 cpus_clear(cpus); 362 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 363 vcpu = kvm->vcpus[i]; 364 if (!vcpu) 365 continue; 366 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 367 continue; 368 cpu = vcpu->cpu; 369 if (cpu != -1 && cpu != me) 370 cpu_set(cpu, cpus); 371 } 372 if (cpus_empty(cpus)) 373 goto out; 374 ++kvm->stat.remote_tlb_flush; 375 smp_call_function_mask(cpus, ack_flush, NULL, 1); 376 out: 377 put_cpu(); 378 } 379 380 void kvm_reload_remote_mmus(struct kvm *kvm) 381 { 382 int i, cpu, me; 383 cpumask_t cpus; 384 struct kvm_vcpu *vcpu; 385 386 me = get_cpu(); 387 cpus_clear(cpus); 388 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 389 vcpu = kvm->vcpus[i]; 390 if (!vcpu) 391 continue; 392 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 393 continue; 394 cpu = vcpu->cpu; 395 if (cpu != -1 && cpu != me) 396 cpu_set(cpu, cpus); 397 } 398 if (cpus_empty(cpus)) 399 goto out; 400 smp_call_function_mask(cpus, ack_flush, NULL, 1); 401 out: 402 put_cpu(); 403 } 404 405 406 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 407 { 408 struct page *page; 409 int r; 410 411 mutex_init(&vcpu->mutex); 412 vcpu->cpu = -1; 413 vcpu->kvm = kvm; 414 vcpu->vcpu_id = id; 415 init_waitqueue_head(&vcpu->wq); 416 417 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 418 if (!page) { 419 r = -ENOMEM; 420 goto fail; 421 } 422 vcpu->run = page_address(page); 423 424 r = kvm_arch_vcpu_init(vcpu); 425 if (r < 0) 426 goto fail_free_run; 427 return 0; 428 429 fail_free_run: 430 free_page((unsigned long)vcpu->run); 431 fail: 432 return r; 433 } 434 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 435 436 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 437 { 438 kvm_arch_vcpu_uninit(vcpu); 439 free_page((unsigned long)vcpu->run); 440 } 441 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 442 443 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 444 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 445 { 446 return container_of(mn, struct kvm, mmu_notifier); 447 } 448 449 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 450 struct mm_struct *mm, 451 unsigned long address) 452 { 453 struct kvm *kvm = mmu_notifier_to_kvm(mn); 454 int need_tlb_flush; 455 456 /* 457 * When ->invalidate_page runs, the linux pte has been zapped 458 * already but the page is still allocated until 459 * ->invalidate_page returns. So if we increase the sequence 460 * here the kvm page fault will notice if the spte can't be 461 * established because the page is going to be freed. If 462 * instead the kvm page fault establishes the spte before 463 * ->invalidate_page runs, kvm_unmap_hva will release it 464 * before returning. 465 * 466 * The sequence increase only need to be seen at spin_unlock 467 * time, and not at spin_lock time. 468 * 469 * Increasing the sequence after the spin_unlock would be 470 * unsafe because the kvm page fault could then establish the 471 * pte after kvm_unmap_hva returned, without noticing the page 472 * is going to be freed. 473 */ 474 spin_lock(&kvm->mmu_lock); 475 kvm->mmu_notifier_seq++; 476 need_tlb_flush = kvm_unmap_hva(kvm, address); 477 spin_unlock(&kvm->mmu_lock); 478 479 /* we've to flush the tlb before the pages can be freed */ 480 if (need_tlb_flush) 481 kvm_flush_remote_tlbs(kvm); 482 483 } 484 485 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 486 struct mm_struct *mm, 487 unsigned long start, 488 unsigned long end) 489 { 490 struct kvm *kvm = mmu_notifier_to_kvm(mn); 491 int need_tlb_flush = 0; 492 493 spin_lock(&kvm->mmu_lock); 494 /* 495 * The count increase must become visible at unlock time as no 496 * spte can be established without taking the mmu_lock and 497 * count is also read inside the mmu_lock critical section. 498 */ 499 kvm->mmu_notifier_count++; 500 for (; start < end; start += PAGE_SIZE) 501 need_tlb_flush |= kvm_unmap_hva(kvm, start); 502 spin_unlock(&kvm->mmu_lock); 503 504 /* we've to flush the tlb before the pages can be freed */ 505 if (need_tlb_flush) 506 kvm_flush_remote_tlbs(kvm); 507 } 508 509 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 510 struct mm_struct *mm, 511 unsigned long start, 512 unsigned long end) 513 { 514 struct kvm *kvm = mmu_notifier_to_kvm(mn); 515 516 spin_lock(&kvm->mmu_lock); 517 /* 518 * This sequence increase will notify the kvm page fault that 519 * the page that is going to be mapped in the spte could have 520 * been freed. 521 */ 522 kvm->mmu_notifier_seq++; 523 /* 524 * The above sequence increase must be visible before the 525 * below count decrease but both values are read by the kvm 526 * page fault under mmu_lock spinlock so we don't need to add 527 * a smb_wmb() here in between the two. 528 */ 529 kvm->mmu_notifier_count--; 530 spin_unlock(&kvm->mmu_lock); 531 532 BUG_ON(kvm->mmu_notifier_count < 0); 533 } 534 535 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 536 struct mm_struct *mm, 537 unsigned long address) 538 { 539 struct kvm *kvm = mmu_notifier_to_kvm(mn); 540 int young; 541 542 spin_lock(&kvm->mmu_lock); 543 young = kvm_age_hva(kvm, address); 544 spin_unlock(&kvm->mmu_lock); 545 546 if (young) 547 kvm_flush_remote_tlbs(kvm); 548 549 return young; 550 } 551 552 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 553 .invalidate_page = kvm_mmu_notifier_invalidate_page, 554 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 555 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 556 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 557 }; 558 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 559 560 static struct kvm *kvm_create_vm(void) 561 { 562 struct kvm *kvm = kvm_arch_create_vm(); 563 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 564 struct page *page; 565 #endif 566 567 if (IS_ERR(kvm)) 568 goto out; 569 570 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 571 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 572 if (!page) { 573 kfree(kvm); 574 return ERR_PTR(-ENOMEM); 575 } 576 kvm->coalesced_mmio_ring = 577 (struct kvm_coalesced_mmio_ring *)page_address(page); 578 #endif 579 580 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 581 { 582 int err; 583 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 584 err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); 585 if (err) { 586 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 587 put_page(page); 588 #endif 589 kfree(kvm); 590 return ERR_PTR(err); 591 } 592 } 593 #endif 594 595 kvm->mm = current->mm; 596 atomic_inc(&kvm->mm->mm_count); 597 spin_lock_init(&kvm->mmu_lock); 598 kvm_io_bus_init(&kvm->pio_bus); 599 mutex_init(&kvm->lock); 600 kvm_io_bus_init(&kvm->mmio_bus); 601 init_rwsem(&kvm->slots_lock); 602 atomic_set(&kvm->users_count, 1); 603 spin_lock(&kvm_lock); 604 list_add(&kvm->vm_list, &vm_list); 605 spin_unlock(&kvm_lock); 606 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 607 kvm_coalesced_mmio_init(kvm); 608 #endif 609 out: 610 return kvm; 611 } 612 613 /* 614 * Free any memory in @free but not in @dont. 615 */ 616 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 617 struct kvm_memory_slot *dont) 618 { 619 if (!dont || free->rmap != dont->rmap) 620 vfree(free->rmap); 621 622 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 623 vfree(free->dirty_bitmap); 624 625 if (!dont || free->lpage_info != dont->lpage_info) 626 vfree(free->lpage_info); 627 628 free->npages = 0; 629 free->dirty_bitmap = NULL; 630 free->rmap = NULL; 631 free->lpage_info = NULL; 632 } 633 634 void kvm_free_physmem(struct kvm *kvm) 635 { 636 int i; 637 638 for (i = 0; i < kvm->nmemslots; ++i) 639 kvm_free_physmem_slot(&kvm->memslots[i], NULL); 640 } 641 642 static void kvm_destroy_vm(struct kvm *kvm) 643 { 644 struct mm_struct *mm = kvm->mm; 645 646 spin_lock(&kvm_lock); 647 list_del(&kvm->vm_list); 648 spin_unlock(&kvm_lock); 649 kvm_io_bus_destroy(&kvm->pio_bus); 650 kvm_io_bus_destroy(&kvm->mmio_bus); 651 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 652 if (kvm->coalesced_mmio_ring != NULL) 653 free_page((unsigned long)kvm->coalesced_mmio_ring); 654 #endif 655 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 656 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 657 #endif 658 kvm_arch_destroy_vm(kvm); 659 mmdrop(mm); 660 } 661 662 void kvm_get_kvm(struct kvm *kvm) 663 { 664 atomic_inc(&kvm->users_count); 665 } 666 EXPORT_SYMBOL_GPL(kvm_get_kvm); 667 668 void kvm_put_kvm(struct kvm *kvm) 669 { 670 if (atomic_dec_and_test(&kvm->users_count)) 671 kvm_destroy_vm(kvm); 672 } 673 EXPORT_SYMBOL_GPL(kvm_put_kvm); 674 675 676 static int kvm_vm_release(struct inode *inode, struct file *filp) 677 { 678 struct kvm *kvm = filp->private_data; 679 680 kvm_put_kvm(kvm); 681 return 0; 682 } 683 684 /* 685 * Allocate some memory and give it an address in the guest physical address 686 * space. 687 * 688 * Discontiguous memory is allowed, mostly for framebuffers. 689 * 690 * Must be called holding mmap_sem for write. 691 */ 692 int __kvm_set_memory_region(struct kvm *kvm, 693 struct kvm_userspace_memory_region *mem, 694 int user_alloc) 695 { 696 int r; 697 gfn_t base_gfn; 698 unsigned long npages; 699 unsigned long i; 700 struct kvm_memory_slot *memslot; 701 struct kvm_memory_slot old, new; 702 703 r = -EINVAL; 704 /* General sanity checks */ 705 if (mem->memory_size & (PAGE_SIZE - 1)) 706 goto out; 707 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 708 goto out; 709 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 710 goto out; 711 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 712 goto out; 713 714 memslot = &kvm->memslots[mem->slot]; 715 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 716 npages = mem->memory_size >> PAGE_SHIFT; 717 718 if (!npages) 719 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 720 721 new = old = *memslot; 722 723 new.base_gfn = base_gfn; 724 new.npages = npages; 725 new.flags = mem->flags; 726 727 /* Disallow changing a memory slot's size. */ 728 r = -EINVAL; 729 if (npages && old.npages && npages != old.npages) 730 goto out_free; 731 732 /* Check for overlaps */ 733 r = -EEXIST; 734 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 735 struct kvm_memory_slot *s = &kvm->memslots[i]; 736 737 if (s == memslot) 738 continue; 739 if (!((base_gfn + npages <= s->base_gfn) || 740 (base_gfn >= s->base_gfn + s->npages))) 741 goto out_free; 742 } 743 744 /* Free page dirty bitmap if unneeded */ 745 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 746 new.dirty_bitmap = NULL; 747 748 r = -ENOMEM; 749 750 /* Allocate if a slot is being created */ 751 #ifndef CONFIG_S390 752 if (npages && !new.rmap) { 753 new.rmap = vmalloc(npages * sizeof(struct page *)); 754 755 if (!new.rmap) 756 goto out_free; 757 758 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 759 760 new.user_alloc = user_alloc; 761 /* 762 * hva_to_rmmap() serialzies with the mmu_lock and to be 763 * safe it has to ignore memslots with !user_alloc && 764 * !userspace_addr. 765 */ 766 if (user_alloc) 767 new.userspace_addr = mem->userspace_addr; 768 else 769 new.userspace_addr = 0; 770 } 771 if (npages && !new.lpage_info) { 772 int largepages = npages / KVM_PAGES_PER_HPAGE; 773 if (npages % KVM_PAGES_PER_HPAGE) 774 largepages++; 775 if (base_gfn % KVM_PAGES_PER_HPAGE) 776 largepages++; 777 778 new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); 779 780 if (!new.lpage_info) 781 goto out_free; 782 783 memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); 784 785 if (base_gfn % KVM_PAGES_PER_HPAGE) 786 new.lpage_info[0].write_count = 1; 787 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) 788 new.lpage_info[largepages-1].write_count = 1; 789 } 790 791 /* Allocate page dirty bitmap if needed */ 792 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 793 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; 794 795 new.dirty_bitmap = vmalloc(dirty_bytes); 796 if (!new.dirty_bitmap) 797 goto out_free; 798 memset(new.dirty_bitmap, 0, dirty_bytes); 799 } 800 #endif /* not defined CONFIG_S390 */ 801 802 if (!npages) 803 kvm_arch_flush_shadow(kvm); 804 805 spin_lock(&kvm->mmu_lock); 806 if (mem->slot >= kvm->nmemslots) 807 kvm->nmemslots = mem->slot + 1; 808 809 *memslot = new; 810 spin_unlock(&kvm->mmu_lock); 811 812 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); 813 if (r) { 814 spin_lock(&kvm->mmu_lock); 815 *memslot = old; 816 spin_unlock(&kvm->mmu_lock); 817 goto out_free; 818 } 819 820 kvm_free_physmem_slot(&old, &new); 821 #ifdef CONFIG_DMAR 822 /* map the pages in iommu page table */ 823 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 824 if (r) 825 goto out; 826 #endif 827 return 0; 828 829 out_free: 830 kvm_free_physmem_slot(&new, &old); 831 out: 832 return r; 833 834 } 835 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 836 837 int kvm_set_memory_region(struct kvm *kvm, 838 struct kvm_userspace_memory_region *mem, 839 int user_alloc) 840 { 841 int r; 842 843 down_write(&kvm->slots_lock); 844 r = __kvm_set_memory_region(kvm, mem, user_alloc); 845 up_write(&kvm->slots_lock); 846 return r; 847 } 848 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 849 850 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 851 struct 852 kvm_userspace_memory_region *mem, 853 int user_alloc) 854 { 855 if (mem->slot >= KVM_MEMORY_SLOTS) 856 return -EINVAL; 857 return kvm_set_memory_region(kvm, mem, user_alloc); 858 } 859 860 int kvm_get_dirty_log(struct kvm *kvm, 861 struct kvm_dirty_log *log, int *is_dirty) 862 { 863 struct kvm_memory_slot *memslot; 864 int r, i; 865 int n; 866 unsigned long any = 0; 867 868 r = -EINVAL; 869 if (log->slot >= KVM_MEMORY_SLOTS) 870 goto out; 871 872 memslot = &kvm->memslots[log->slot]; 873 r = -ENOENT; 874 if (!memslot->dirty_bitmap) 875 goto out; 876 877 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 878 879 for (i = 0; !any && i < n/sizeof(long); ++i) 880 any = memslot->dirty_bitmap[i]; 881 882 r = -EFAULT; 883 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 884 goto out; 885 886 if (any) 887 *is_dirty = 1; 888 889 r = 0; 890 out: 891 return r; 892 } 893 894 int is_error_page(struct page *page) 895 { 896 return page == bad_page; 897 } 898 EXPORT_SYMBOL_GPL(is_error_page); 899 900 int is_error_pfn(pfn_t pfn) 901 { 902 return pfn == bad_pfn; 903 } 904 EXPORT_SYMBOL_GPL(is_error_pfn); 905 906 static inline unsigned long bad_hva(void) 907 { 908 return PAGE_OFFSET; 909 } 910 911 int kvm_is_error_hva(unsigned long addr) 912 { 913 return addr == bad_hva(); 914 } 915 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 916 917 static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 918 { 919 int i; 920 921 for (i = 0; i < kvm->nmemslots; ++i) { 922 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 923 924 if (gfn >= memslot->base_gfn 925 && gfn < memslot->base_gfn + memslot->npages) 926 return memslot; 927 } 928 return NULL; 929 } 930 931 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 932 { 933 gfn = unalias_gfn(kvm, gfn); 934 return __gfn_to_memslot(kvm, gfn); 935 } 936 937 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 938 { 939 int i; 940 941 gfn = unalias_gfn(kvm, gfn); 942 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 943 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 944 945 if (gfn >= memslot->base_gfn 946 && gfn < memslot->base_gfn + memslot->npages) 947 return 1; 948 } 949 return 0; 950 } 951 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 952 953 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 954 { 955 struct kvm_memory_slot *slot; 956 957 gfn = unalias_gfn(kvm, gfn); 958 slot = __gfn_to_memslot(kvm, gfn); 959 if (!slot) 960 return bad_hva(); 961 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 962 } 963 EXPORT_SYMBOL_GPL(gfn_to_hva); 964 965 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 966 { 967 struct page *page[1]; 968 unsigned long addr; 969 int npages; 970 pfn_t pfn; 971 972 might_sleep(); 973 974 addr = gfn_to_hva(kvm, gfn); 975 if (kvm_is_error_hva(addr)) { 976 get_page(bad_page); 977 return page_to_pfn(bad_page); 978 } 979 980 npages = get_user_pages_fast(addr, 1, 1, page); 981 982 if (unlikely(npages != 1)) { 983 struct vm_area_struct *vma; 984 985 down_read(¤t->mm->mmap_sem); 986 vma = find_vma(current->mm, addr); 987 988 if (vma == NULL || addr < vma->vm_start || 989 !(vma->vm_flags & VM_PFNMAP)) { 990 up_read(¤t->mm->mmap_sem); 991 get_page(bad_page); 992 return page_to_pfn(bad_page); 993 } 994 995 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 996 up_read(¤t->mm->mmap_sem); 997 BUG_ON(!kvm_is_mmio_pfn(pfn)); 998 } else 999 pfn = page_to_pfn(page[0]); 1000 1001 return pfn; 1002 } 1003 1004 EXPORT_SYMBOL_GPL(gfn_to_pfn); 1005 1006 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 1007 { 1008 pfn_t pfn; 1009 1010 pfn = gfn_to_pfn(kvm, gfn); 1011 if (!kvm_is_mmio_pfn(pfn)) 1012 return pfn_to_page(pfn); 1013 1014 WARN_ON(kvm_is_mmio_pfn(pfn)); 1015 1016 get_page(bad_page); 1017 return bad_page; 1018 } 1019 1020 EXPORT_SYMBOL_GPL(gfn_to_page); 1021 1022 void kvm_release_page_clean(struct page *page) 1023 { 1024 kvm_release_pfn_clean(page_to_pfn(page)); 1025 } 1026 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 1027 1028 void kvm_release_pfn_clean(pfn_t pfn) 1029 { 1030 if (!kvm_is_mmio_pfn(pfn)) 1031 put_page(pfn_to_page(pfn)); 1032 } 1033 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1034 1035 void kvm_release_page_dirty(struct page *page) 1036 { 1037 kvm_release_pfn_dirty(page_to_pfn(page)); 1038 } 1039 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1040 1041 void kvm_release_pfn_dirty(pfn_t pfn) 1042 { 1043 kvm_set_pfn_dirty(pfn); 1044 kvm_release_pfn_clean(pfn); 1045 } 1046 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 1047 1048 void kvm_set_page_dirty(struct page *page) 1049 { 1050 kvm_set_pfn_dirty(page_to_pfn(page)); 1051 } 1052 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 1053 1054 void kvm_set_pfn_dirty(pfn_t pfn) 1055 { 1056 if (!kvm_is_mmio_pfn(pfn)) { 1057 struct page *page = pfn_to_page(pfn); 1058 if (!PageReserved(page)) 1059 SetPageDirty(page); 1060 } 1061 } 1062 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 1063 1064 void kvm_set_pfn_accessed(pfn_t pfn) 1065 { 1066 if (!kvm_is_mmio_pfn(pfn)) 1067 mark_page_accessed(pfn_to_page(pfn)); 1068 } 1069 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1070 1071 void kvm_get_pfn(pfn_t pfn) 1072 { 1073 if (!kvm_is_mmio_pfn(pfn)) 1074 get_page(pfn_to_page(pfn)); 1075 } 1076 EXPORT_SYMBOL_GPL(kvm_get_pfn); 1077 1078 static int next_segment(unsigned long len, int offset) 1079 { 1080 if (len > PAGE_SIZE - offset) 1081 return PAGE_SIZE - offset; 1082 else 1083 return len; 1084 } 1085 1086 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 1087 int len) 1088 { 1089 int r; 1090 unsigned long addr; 1091 1092 addr = gfn_to_hva(kvm, gfn); 1093 if (kvm_is_error_hva(addr)) 1094 return -EFAULT; 1095 r = copy_from_user(data, (void __user *)addr + offset, len); 1096 if (r) 1097 return -EFAULT; 1098 return 0; 1099 } 1100 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1101 1102 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 1103 { 1104 gfn_t gfn = gpa >> PAGE_SHIFT; 1105 int seg; 1106 int offset = offset_in_page(gpa); 1107 int ret; 1108 1109 while ((seg = next_segment(len, offset)) != 0) { 1110 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 1111 if (ret < 0) 1112 return ret; 1113 offset = 0; 1114 len -= seg; 1115 data += seg; 1116 ++gfn; 1117 } 1118 return 0; 1119 } 1120 EXPORT_SYMBOL_GPL(kvm_read_guest); 1121 1122 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 1123 unsigned long len) 1124 { 1125 int r; 1126 unsigned long addr; 1127 gfn_t gfn = gpa >> PAGE_SHIFT; 1128 int offset = offset_in_page(gpa); 1129 1130 addr = gfn_to_hva(kvm, gfn); 1131 if (kvm_is_error_hva(addr)) 1132 return -EFAULT; 1133 pagefault_disable(); 1134 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 1135 pagefault_enable(); 1136 if (r) 1137 return -EFAULT; 1138 return 0; 1139 } 1140 EXPORT_SYMBOL(kvm_read_guest_atomic); 1141 1142 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1143 int offset, int len) 1144 { 1145 int r; 1146 unsigned long addr; 1147 1148 addr = gfn_to_hva(kvm, gfn); 1149 if (kvm_is_error_hva(addr)) 1150 return -EFAULT; 1151 r = copy_to_user((void __user *)addr + offset, data, len); 1152 if (r) 1153 return -EFAULT; 1154 mark_page_dirty(kvm, gfn); 1155 return 0; 1156 } 1157 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1158 1159 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1160 unsigned long len) 1161 { 1162 gfn_t gfn = gpa >> PAGE_SHIFT; 1163 int seg; 1164 int offset = offset_in_page(gpa); 1165 int ret; 1166 1167 while ((seg = next_segment(len, offset)) != 0) { 1168 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1169 if (ret < 0) 1170 return ret; 1171 offset = 0; 1172 len -= seg; 1173 data += seg; 1174 ++gfn; 1175 } 1176 return 0; 1177 } 1178 1179 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1180 { 1181 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1182 } 1183 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1184 1185 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1186 { 1187 gfn_t gfn = gpa >> PAGE_SHIFT; 1188 int seg; 1189 int offset = offset_in_page(gpa); 1190 int ret; 1191 1192 while ((seg = next_segment(len, offset)) != 0) { 1193 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1194 if (ret < 0) 1195 return ret; 1196 offset = 0; 1197 len -= seg; 1198 ++gfn; 1199 } 1200 return 0; 1201 } 1202 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1203 1204 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1205 { 1206 struct kvm_memory_slot *memslot; 1207 1208 gfn = unalias_gfn(kvm, gfn); 1209 memslot = __gfn_to_memslot(kvm, gfn); 1210 if (memslot && memslot->dirty_bitmap) { 1211 unsigned long rel_gfn = gfn - memslot->base_gfn; 1212 1213 /* avoid RMW */ 1214 if (!test_bit(rel_gfn, memslot->dirty_bitmap)) 1215 set_bit(rel_gfn, memslot->dirty_bitmap); 1216 } 1217 } 1218 1219 /* 1220 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1221 */ 1222 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1223 { 1224 DEFINE_WAIT(wait); 1225 1226 for (;;) { 1227 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1228 1229 if (kvm_cpu_has_interrupt(vcpu) || 1230 kvm_cpu_has_pending_timer(vcpu) || 1231 kvm_arch_vcpu_runnable(vcpu)) { 1232 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1233 break; 1234 } 1235 if (signal_pending(current)) 1236 break; 1237 1238 vcpu_put(vcpu); 1239 schedule(); 1240 vcpu_load(vcpu); 1241 } 1242 1243 finish_wait(&vcpu->wq, &wait); 1244 } 1245 1246 void kvm_resched(struct kvm_vcpu *vcpu) 1247 { 1248 if (!need_resched()) 1249 return; 1250 cond_resched(); 1251 } 1252 EXPORT_SYMBOL_GPL(kvm_resched); 1253 1254 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1255 { 1256 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1257 struct page *page; 1258 1259 if (vmf->pgoff == 0) 1260 page = virt_to_page(vcpu->run); 1261 #ifdef CONFIG_X86 1262 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1263 page = virt_to_page(vcpu->arch.pio_data); 1264 #endif 1265 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1266 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1267 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1268 #endif 1269 else 1270 return VM_FAULT_SIGBUS; 1271 get_page(page); 1272 vmf->page = page; 1273 return 0; 1274 } 1275 1276 static struct vm_operations_struct kvm_vcpu_vm_ops = { 1277 .fault = kvm_vcpu_fault, 1278 }; 1279 1280 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1281 { 1282 vma->vm_ops = &kvm_vcpu_vm_ops; 1283 return 0; 1284 } 1285 1286 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1287 { 1288 struct kvm_vcpu *vcpu = filp->private_data; 1289 1290 kvm_put_kvm(vcpu->kvm); 1291 return 0; 1292 } 1293 1294 static const struct file_operations kvm_vcpu_fops = { 1295 .release = kvm_vcpu_release, 1296 .unlocked_ioctl = kvm_vcpu_ioctl, 1297 .compat_ioctl = kvm_vcpu_ioctl, 1298 .mmap = kvm_vcpu_mmap, 1299 }; 1300 1301 /* 1302 * Allocates an inode for the vcpu. 1303 */ 1304 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1305 { 1306 int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); 1307 if (fd < 0) 1308 kvm_put_kvm(vcpu->kvm); 1309 return fd; 1310 } 1311 1312 /* 1313 * Creates some virtual cpus. Good luck creating more than one. 1314 */ 1315 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) 1316 { 1317 int r; 1318 struct kvm_vcpu *vcpu; 1319 1320 if (!valid_vcpu(n)) 1321 return -EINVAL; 1322 1323 vcpu = kvm_arch_vcpu_create(kvm, n); 1324 if (IS_ERR(vcpu)) 1325 return PTR_ERR(vcpu); 1326 1327 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1328 1329 r = kvm_arch_vcpu_setup(vcpu); 1330 if (r) 1331 return r; 1332 1333 mutex_lock(&kvm->lock); 1334 if (kvm->vcpus[n]) { 1335 r = -EEXIST; 1336 goto vcpu_destroy; 1337 } 1338 kvm->vcpus[n] = vcpu; 1339 mutex_unlock(&kvm->lock); 1340 1341 /* Now it's all set up, let userspace reach it */ 1342 kvm_get_kvm(kvm); 1343 r = create_vcpu_fd(vcpu); 1344 if (r < 0) 1345 goto unlink; 1346 return r; 1347 1348 unlink: 1349 mutex_lock(&kvm->lock); 1350 kvm->vcpus[n] = NULL; 1351 vcpu_destroy: 1352 mutex_unlock(&kvm->lock); 1353 kvm_arch_vcpu_destroy(vcpu); 1354 return r; 1355 } 1356 1357 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1358 { 1359 if (sigset) { 1360 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1361 vcpu->sigset_active = 1; 1362 vcpu->sigset = *sigset; 1363 } else 1364 vcpu->sigset_active = 0; 1365 return 0; 1366 } 1367 1368 static long kvm_vcpu_ioctl(struct file *filp, 1369 unsigned int ioctl, unsigned long arg) 1370 { 1371 struct kvm_vcpu *vcpu = filp->private_data; 1372 void __user *argp = (void __user *)arg; 1373 int r; 1374 struct kvm_fpu *fpu = NULL; 1375 struct kvm_sregs *kvm_sregs = NULL; 1376 1377 if (vcpu->kvm->mm != current->mm) 1378 return -EIO; 1379 switch (ioctl) { 1380 case KVM_RUN: 1381 r = -EINVAL; 1382 if (arg) 1383 goto out; 1384 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1385 break; 1386 case KVM_GET_REGS: { 1387 struct kvm_regs *kvm_regs; 1388 1389 r = -ENOMEM; 1390 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1391 if (!kvm_regs) 1392 goto out; 1393 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1394 if (r) 1395 goto out_free1; 1396 r = -EFAULT; 1397 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1398 goto out_free1; 1399 r = 0; 1400 out_free1: 1401 kfree(kvm_regs); 1402 break; 1403 } 1404 case KVM_SET_REGS: { 1405 struct kvm_regs *kvm_regs; 1406 1407 r = -ENOMEM; 1408 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1409 if (!kvm_regs) 1410 goto out; 1411 r = -EFAULT; 1412 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1413 goto out_free2; 1414 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1415 if (r) 1416 goto out_free2; 1417 r = 0; 1418 out_free2: 1419 kfree(kvm_regs); 1420 break; 1421 } 1422 case KVM_GET_SREGS: { 1423 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1424 r = -ENOMEM; 1425 if (!kvm_sregs) 1426 goto out; 1427 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1428 if (r) 1429 goto out; 1430 r = -EFAULT; 1431 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1432 goto out; 1433 r = 0; 1434 break; 1435 } 1436 case KVM_SET_SREGS: { 1437 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1438 r = -ENOMEM; 1439 if (!kvm_sregs) 1440 goto out; 1441 r = -EFAULT; 1442 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1443 goto out; 1444 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1445 if (r) 1446 goto out; 1447 r = 0; 1448 break; 1449 } 1450 case KVM_GET_MP_STATE: { 1451 struct kvm_mp_state mp_state; 1452 1453 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1454 if (r) 1455 goto out; 1456 r = -EFAULT; 1457 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1458 goto out; 1459 r = 0; 1460 break; 1461 } 1462 case KVM_SET_MP_STATE: { 1463 struct kvm_mp_state mp_state; 1464 1465 r = -EFAULT; 1466 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1467 goto out; 1468 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1469 if (r) 1470 goto out; 1471 r = 0; 1472 break; 1473 } 1474 case KVM_TRANSLATE: { 1475 struct kvm_translation tr; 1476 1477 r = -EFAULT; 1478 if (copy_from_user(&tr, argp, sizeof tr)) 1479 goto out; 1480 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1481 if (r) 1482 goto out; 1483 r = -EFAULT; 1484 if (copy_to_user(argp, &tr, sizeof tr)) 1485 goto out; 1486 r = 0; 1487 break; 1488 } 1489 case KVM_DEBUG_GUEST: { 1490 struct kvm_debug_guest dbg; 1491 1492 r = -EFAULT; 1493 if (copy_from_user(&dbg, argp, sizeof dbg)) 1494 goto out; 1495 r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); 1496 if (r) 1497 goto out; 1498 r = 0; 1499 break; 1500 } 1501 case KVM_SET_SIGNAL_MASK: { 1502 struct kvm_signal_mask __user *sigmask_arg = argp; 1503 struct kvm_signal_mask kvm_sigmask; 1504 sigset_t sigset, *p; 1505 1506 p = NULL; 1507 if (argp) { 1508 r = -EFAULT; 1509 if (copy_from_user(&kvm_sigmask, argp, 1510 sizeof kvm_sigmask)) 1511 goto out; 1512 r = -EINVAL; 1513 if (kvm_sigmask.len != sizeof sigset) 1514 goto out; 1515 r = -EFAULT; 1516 if (copy_from_user(&sigset, sigmask_arg->sigset, 1517 sizeof sigset)) 1518 goto out; 1519 p = &sigset; 1520 } 1521 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1522 break; 1523 } 1524 case KVM_GET_FPU: { 1525 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1526 r = -ENOMEM; 1527 if (!fpu) 1528 goto out; 1529 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1530 if (r) 1531 goto out; 1532 r = -EFAULT; 1533 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1534 goto out; 1535 r = 0; 1536 break; 1537 } 1538 case KVM_SET_FPU: { 1539 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1540 r = -ENOMEM; 1541 if (!fpu) 1542 goto out; 1543 r = -EFAULT; 1544 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1545 goto out; 1546 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1547 if (r) 1548 goto out; 1549 r = 0; 1550 break; 1551 } 1552 default: 1553 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1554 } 1555 out: 1556 kfree(fpu); 1557 kfree(kvm_sregs); 1558 return r; 1559 } 1560 1561 static long kvm_vm_ioctl(struct file *filp, 1562 unsigned int ioctl, unsigned long arg) 1563 { 1564 struct kvm *kvm = filp->private_data; 1565 void __user *argp = (void __user *)arg; 1566 int r; 1567 1568 if (kvm->mm != current->mm) 1569 return -EIO; 1570 switch (ioctl) { 1571 case KVM_CREATE_VCPU: 1572 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1573 if (r < 0) 1574 goto out; 1575 break; 1576 case KVM_SET_USER_MEMORY_REGION: { 1577 struct kvm_userspace_memory_region kvm_userspace_mem; 1578 1579 r = -EFAULT; 1580 if (copy_from_user(&kvm_userspace_mem, argp, 1581 sizeof kvm_userspace_mem)) 1582 goto out; 1583 1584 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1585 if (r) 1586 goto out; 1587 break; 1588 } 1589 case KVM_GET_DIRTY_LOG: { 1590 struct kvm_dirty_log log; 1591 1592 r = -EFAULT; 1593 if (copy_from_user(&log, argp, sizeof log)) 1594 goto out; 1595 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1596 if (r) 1597 goto out; 1598 break; 1599 } 1600 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1601 case KVM_REGISTER_COALESCED_MMIO: { 1602 struct kvm_coalesced_mmio_zone zone; 1603 r = -EFAULT; 1604 if (copy_from_user(&zone, argp, sizeof zone)) 1605 goto out; 1606 r = -ENXIO; 1607 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1608 if (r) 1609 goto out; 1610 r = 0; 1611 break; 1612 } 1613 case KVM_UNREGISTER_COALESCED_MMIO: { 1614 struct kvm_coalesced_mmio_zone zone; 1615 r = -EFAULT; 1616 if (copy_from_user(&zone, argp, sizeof zone)) 1617 goto out; 1618 r = -ENXIO; 1619 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1620 if (r) 1621 goto out; 1622 r = 0; 1623 break; 1624 } 1625 #endif 1626 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 1627 case KVM_ASSIGN_PCI_DEVICE: { 1628 struct kvm_assigned_pci_dev assigned_dev; 1629 1630 r = -EFAULT; 1631 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1632 goto out; 1633 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 1634 if (r) 1635 goto out; 1636 break; 1637 } 1638 case KVM_ASSIGN_IRQ: { 1639 struct kvm_assigned_irq assigned_irq; 1640 1641 r = -EFAULT; 1642 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1643 goto out; 1644 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1645 if (r) 1646 goto out; 1647 break; 1648 } 1649 #endif 1650 default: 1651 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1652 } 1653 out: 1654 return r; 1655 } 1656 1657 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1658 { 1659 struct page *page[1]; 1660 unsigned long addr; 1661 int npages; 1662 gfn_t gfn = vmf->pgoff; 1663 struct kvm *kvm = vma->vm_file->private_data; 1664 1665 addr = gfn_to_hva(kvm, gfn); 1666 if (kvm_is_error_hva(addr)) 1667 return VM_FAULT_SIGBUS; 1668 1669 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1670 NULL); 1671 if (unlikely(npages != 1)) 1672 return VM_FAULT_SIGBUS; 1673 1674 vmf->page = page[0]; 1675 return 0; 1676 } 1677 1678 static struct vm_operations_struct kvm_vm_vm_ops = { 1679 .fault = kvm_vm_fault, 1680 }; 1681 1682 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1683 { 1684 vma->vm_ops = &kvm_vm_vm_ops; 1685 return 0; 1686 } 1687 1688 static const struct file_operations kvm_vm_fops = { 1689 .release = kvm_vm_release, 1690 .unlocked_ioctl = kvm_vm_ioctl, 1691 .compat_ioctl = kvm_vm_ioctl, 1692 .mmap = kvm_vm_mmap, 1693 }; 1694 1695 static int kvm_dev_ioctl_create_vm(void) 1696 { 1697 int fd; 1698 struct kvm *kvm; 1699 1700 kvm = kvm_create_vm(); 1701 if (IS_ERR(kvm)) 1702 return PTR_ERR(kvm); 1703 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); 1704 if (fd < 0) 1705 kvm_put_kvm(kvm); 1706 1707 return fd; 1708 } 1709 1710 static long kvm_dev_ioctl(struct file *filp, 1711 unsigned int ioctl, unsigned long arg) 1712 { 1713 long r = -EINVAL; 1714 1715 switch (ioctl) { 1716 case KVM_GET_API_VERSION: 1717 r = -EINVAL; 1718 if (arg) 1719 goto out; 1720 r = KVM_API_VERSION; 1721 break; 1722 case KVM_CREATE_VM: 1723 r = -EINVAL; 1724 if (arg) 1725 goto out; 1726 r = kvm_dev_ioctl_create_vm(); 1727 break; 1728 case KVM_CHECK_EXTENSION: 1729 r = kvm_dev_ioctl_check_extension(arg); 1730 break; 1731 case KVM_GET_VCPU_MMAP_SIZE: 1732 r = -EINVAL; 1733 if (arg) 1734 goto out; 1735 r = PAGE_SIZE; /* struct kvm_run */ 1736 #ifdef CONFIG_X86 1737 r += PAGE_SIZE; /* pio data page */ 1738 #endif 1739 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1740 r += PAGE_SIZE; /* coalesced mmio ring page */ 1741 #endif 1742 break; 1743 case KVM_TRACE_ENABLE: 1744 case KVM_TRACE_PAUSE: 1745 case KVM_TRACE_DISABLE: 1746 r = kvm_trace_ioctl(ioctl, arg); 1747 break; 1748 default: 1749 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1750 } 1751 out: 1752 return r; 1753 } 1754 1755 static struct file_operations kvm_chardev_ops = { 1756 .unlocked_ioctl = kvm_dev_ioctl, 1757 .compat_ioctl = kvm_dev_ioctl, 1758 }; 1759 1760 static struct miscdevice kvm_dev = { 1761 KVM_MINOR, 1762 "kvm", 1763 &kvm_chardev_ops, 1764 }; 1765 1766 static void hardware_enable(void *junk) 1767 { 1768 int cpu = raw_smp_processor_id(); 1769 1770 if (cpu_isset(cpu, cpus_hardware_enabled)) 1771 return; 1772 cpu_set(cpu, cpus_hardware_enabled); 1773 kvm_arch_hardware_enable(NULL); 1774 } 1775 1776 static void hardware_disable(void *junk) 1777 { 1778 int cpu = raw_smp_processor_id(); 1779 1780 if (!cpu_isset(cpu, cpus_hardware_enabled)) 1781 return; 1782 cpu_clear(cpu, cpus_hardware_enabled); 1783 kvm_arch_hardware_disable(NULL); 1784 } 1785 1786 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1787 void *v) 1788 { 1789 int cpu = (long)v; 1790 1791 val &= ~CPU_TASKS_FROZEN; 1792 switch (val) { 1793 case CPU_DYING: 1794 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1795 cpu); 1796 hardware_disable(NULL); 1797 break; 1798 case CPU_UP_CANCELED: 1799 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1800 cpu); 1801 smp_call_function_single(cpu, hardware_disable, NULL, 1); 1802 break; 1803 case CPU_ONLINE: 1804 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1805 cpu); 1806 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1807 break; 1808 } 1809 return NOTIFY_OK; 1810 } 1811 1812 1813 asmlinkage void kvm_handle_fault_on_reboot(void) 1814 { 1815 if (kvm_rebooting) 1816 /* spin while reset goes on */ 1817 while (true) 1818 ; 1819 /* Fault while not rebooting. We want the trace. */ 1820 BUG(); 1821 } 1822 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1823 1824 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1825 void *v) 1826 { 1827 if (val == SYS_RESTART) { 1828 /* 1829 * Some (well, at least mine) BIOSes hang on reboot if 1830 * in vmx root mode. 1831 */ 1832 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1833 kvm_rebooting = true; 1834 on_each_cpu(hardware_disable, NULL, 1); 1835 } 1836 return NOTIFY_OK; 1837 } 1838 1839 static struct notifier_block kvm_reboot_notifier = { 1840 .notifier_call = kvm_reboot, 1841 .priority = 0, 1842 }; 1843 1844 void kvm_io_bus_init(struct kvm_io_bus *bus) 1845 { 1846 memset(bus, 0, sizeof(*bus)); 1847 } 1848 1849 void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1850 { 1851 int i; 1852 1853 for (i = 0; i < bus->dev_count; i++) { 1854 struct kvm_io_device *pos = bus->devs[i]; 1855 1856 kvm_iodevice_destructor(pos); 1857 } 1858 } 1859 1860 struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, 1861 gpa_t addr, int len, int is_write) 1862 { 1863 int i; 1864 1865 for (i = 0; i < bus->dev_count; i++) { 1866 struct kvm_io_device *pos = bus->devs[i]; 1867 1868 if (pos->in_range(pos, addr, len, is_write)) 1869 return pos; 1870 } 1871 1872 return NULL; 1873 } 1874 1875 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) 1876 { 1877 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); 1878 1879 bus->devs[bus->dev_count++] = dev; 1880 } 1881 1882 static struct notifier_block kvm_cpu_notifier = { 1883 .notifier_call = kvm_cpu_hotplug, 1884 .priority = 20, /* must be > scheduler priority */ 1885 }; 1886 1887 static int vm_stat_get(void *_offset, u64 *val) 1888 { 1889 unsigned offset = (long)_offset; 1890 struct kvm *kvm; 1891 1892 *val = 0; 1893 spin_lock(&kvm_lock); 1894 list_for_each_entry(kvm, &vm_list, vm_list) 1895 *val += *(u32 *)((void *)kvm + offset); 1896 spin_unlock(&kvm_lock); 1897 return 0; 1898 } 1899 1900 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 1901 1902 static int vcpu_stat_get(void *_offset, u64 *val) 1903 { 1904 unsigned offset = (long)_offset; 1905 struct kvm *kvm; 1906 struct kvm_vcpu *vcpu; 1907 int i; 1908 1909 *val = 0; 1910 spin_lock(&kvm_lock); 1911 list_for_each_entry(kvm, &vm_list, vm_list) 1912 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 1913 vcpu = kvm->vcpus[i]; 1914 if (vcpu) 1915 *val += *(u32 *)((void *)vcpu + offset); 1916 } 1917 spin_unlock(&kvm_lock); 1918 return 0; 1919 } 1920 1921 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 1922 1923 static struct file_operations *stat_fops[] = { 1924 [KVM_STAT_VCPU] = &vcpu_stat_fops, 1925 [KVM_STAT_VM] = &vm_stat_fops, 1926 }; 1927 1928 static void kvm_init_debug(void) 1929 { 1930 struct kvm_stats_debugfs_item *p; 1931 1932 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 1933 for (p = debugfs_entries; p->name; ++p) 1934 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 1935 (void *)(long)p->offset, 1936 stat_fops[p->kind]); 1937 } 1938 1939 static void kvm_exit_debug(void) 1940 { 1941 struct kvm_stats_debugfs_item *p; 1942 1943 for (p = debugfs_entries; p->name; ++p) 1944 debugfs_remove(p->dentry); 1945 debugfs_remove(kvm_debugfs_dir); 1946 } 1947 1948 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 1949 { 1950 hardware_disable(NULL); 1951 return 0; 1952 } 1953 1954 static int kvm_resume(struct sys_device *dev) 1955 { 1956 hardware_enable(NULL); 1957 return 0; 1958 } 1959 1960 static struct sysdev_class kvm_sysdev_class = { 1961 .name = "kvm", 1962 .suspend = kvm_suspend, 1963 .resume = kvm_resume, 1964 }; 1965 1966 static struct sys_device kvm_sysdev = { 1967 .id = 0, 1968 .cls = &kvm_sysdev_class, 1969 }; 1970 1971 struct page *bad_page; 1972 pfn_t bad_pfn; 1973 1974 static inline 1975 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 1976 { 1977 return container_of(pn, struct kvm_vcpu, preempt_notifier); 1978 } 1979 1980 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 1981 { 1982 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 1983 1984 kvm_arch_vcpu_load(vcpu, cpu); 1985 } 1986 1987 static void kvm_sched_out(struct preempt_notifier *pn, 1988 struct task_struct *next) 1989 { 1990 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 1991 1992 kvm_arch_vcpu_put(vcpu); 1993 } 1994 1995 int kvm_init(void *opaque, unsigned int vcpu_size, 1996 struct module *module) 1997 { 1998 int r; 1999 int cpu; 2000 2001 kvm_init_debug(); 2002 2003 r = kvm_arch_init(opaque); 2004 if (r) 2005 goto out_fail; 2006 2007 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2008 2009 if (bad_page == NULL) { 2010 r = -ENOMEM; 2011 goto out; 2012 } 2013 2014 bad_pfn = page_to_pfn(bad_page); 2015 2016 r = kvm_arch_hardware_setup(); 2017 if (r < 0) 2018 goto out_free_0; 2019 2020 for_each_online_cpu(cpu) { 2021 smp_call_function_single(cpu, 2022 kvm_arch_check_processor_compat, 2023 &r, 1); 2024 if (r < 0) 2025 goto out_free_1; 2026 } 2027 2028 on_each_cpu(hardware_enable, NULL, 1); 2029 r = register_cpu_notifier(&kvm_cpu_notifier); 2030 if (r) 2031 goto out_free_2; 2032 register_reboot_notifier(&kvm_reboot_notifier); 2033 2034 r = sysdev_class_register(&kvm_sysdev_class); 2035 if (r) 2036 goto out_free_3; 2037 2038 r = sysdev_register(&kvm_sysdev); 2039 if (r) 2040 goto out_free_4; 2041 2042 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2043 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2044 __alignof__(struct kvm_vcpu), 2045 0, NULL); 2046 if (!kvm_vcpu_cache) { 2047 r = -ENOMEM; 2048 goto out_free_5; 2049 } 2050 2051 kvm_chardev_ops.owner = module; 2052 2053 r = misc_register(&kvm_dev); 2054 if (r) { 2055 printk(KERN_ERR "kvm: misc device register failed\n"); 2056 goto out_free; 2057 } 2058 2059 kvm_preempt_ops.sched_in = kvm_sched_in; 2060 kvm_preempt_ops.sched_out = kvm_sched_out; 2061 2062 return 0; 2063 2064 out_free: 2065 kmem_cache_destroy(kvm_vcpu_cache); 2066 out_free_5: 2067 sysdev_unregister(&kvm_sysdev); 2068 out_free_4: 2069 sysdev_class_unregister(&kvm_sysdev_class); 2070 out_free_3: 2071 unregister_reboot_notifier(&kvm_reboot_notifier); 2072 unregister_cpu_notifier(&kvm_cpu_notifier); 2073 out_free_2: 2074 on_each_cpu(hardware_disable, NULL, 1); 2075 out_free_1: 2076 kvm_arch_hardware_unsetup(); 2077 out_free_0: 2078 __free_page(bad_page); 2079 out: 2080 kvm_arch_exit(); 2081 kvm_exit_debug(); 2082 out_fail: 2083 return r; 2084 } 2085 EXPORT_SYMBOL_GPL(kvm_init); 2086 2087 void kvm_exit(void) 2088 { 2089 kvm_trace_cleanup(); 2090 misc_deregister(&kvm_dev); 2091 kmem_cache_destroy(kvm_vcpu_cache); 2092 sysdev_unregister(&kvm_sysdev); 2093 sysdev_class_unregister(&kvm_sysdev_class); 2094 unregister_reboot_notifier(&kvm_reboot_notifier); 2095 unregister_cpu_notifier(&kvm_cpu_notifier); 2096 on_each_cpu(hardware_disable, NULL, 1); 2097 kvm_arch_hardware_unsetup(); 2098 kvm_arch_exit(); 2099 kvm_exit_debug(); 2100 __free_page(bad_page); 2101 } 2102 EXPORT_SYMBOL_GPL(kvm_exit); 2103