Lines Matching +full:fails +full:- +full:without +full:- +full:test +full:- +full:cd
1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine (KVM) Hypervisor
74 MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor");
82 /* Default doubles per-vcpu halt_poll_ns. */
92 /* Default halves per-vcpu halt_poll_ns. */
98 * Allow direct access (from KVM or the CPU) without MMU notifier protection
107 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
132 * - Prevent a compat task from opening /dev/kvm
133 * - If the open has been done by a 64bit task, and the KVM fd
137 unsigned long arg) { return -EINVAL; } in kvm_no_compat_ioctl()
141 return is_compat_task() ? -ENODEV : 0; in kvm_no_compat_open()
169 preempt_notifier_register(&vcpu->preempt_notifier); in vcpu_load()
179 preempt_notifier_unregister(&vcpu->preempt_notifier); in vcpu_put()
238 cpu = READ_ONCE(vcpu->cpu); in kvm_make_vcpu_request()
239 if (cpu != -1 && cpu != current_cpu) in kvm_make_vcpu_request()
295 ++kvm->stat.generic.remote_tlb_flush_requests; in kvm_flush_remote_tlbs()
299 * mode. Pairs with a memory barrier in arch-specific code. in kvm_flush_remote_tlbs()
300 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest in kvm_flush_remote_tlbs()
302 * - powerpc: smp_mb in kvmppc_prepare_to_enter. in kvm_flush_remote_tlbs()
305 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that in kvm_flush_remote_tlbs()
310 ++kvm->stat.generic.remote_tlb_flush; in kvm_flush_remote_tlbs()
320 * Fall back to a flushing entire TLBs if the architecture range-based in kvm_flush_remote_tlbs_range()
337 lockdep_assert_held(&kvm->slots_lock); in kvm_flush_remote_tlbs_memslot()
338 kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); in kvm_flush_remote_tlbs_memslot()
353 gfp_flags |= mc->gfp_zero; in mmu_memory_cache_alloc_obj()
355 if (mc->kmem_cache) in mmu_memory_cache_alloc_obj()
356 return kmem_cache_alloc(mc->kmem_cache, gfp_flags); in mmu_memory_cache_alloc_obj()
359 if (page && mc->init_value) in mmu_memory_cache_alloc_obj()
360 memset64(page, mc->init_value, PAGE_SIZE / sizeof(u64)); in mmu_memory_cache_alloc_obj()
366 gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; in __kvm_mmu_topup_memory_cache()
369 if (mc->nobjs >= min) in __kvm_mmu_topup_memory_cache()
372 if (unlikely(!mc->objects)) { in __kvm_mmu_topup_memory_cache()
374 return -EIO; in __kvm_mmu_topup_memory_cache()
380 if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) in __kvm_mmu_topup_memory_cache()
381 return -EIO; in __kvm_mmu_topup_memory_cache()
383 mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); in __kvm_mmu_topup_memory_cache()
384 if (!mc->objects) in __kvm_mmu_topup_memory_cache()
385 return -ENOMEM; in __kvm_mmu_topup_memory_cache()
387 mc->capacity = capacity; in __kvm_mmu_topup_memory_cache()
391 if (WARN_ON_ONCE(mc->capacity != capacity)) in __kvm_mmu_topup_memory_cache()
392 return -EIO; in __kvm_mmu_topup_memory_cache()
394 while (mc->nobjs < mc->capacity) { in __kvm_mmu_topup_memory_cache()
397 return mc->nobjs >= min ? 0 : -ENOMEM; in __kvm_mmu_topup_memory_cache()
398 mc->objects[mc->nobjs++] = obj; in __kvm_mmu_topup_memory_cache()
410 return mc->nobjs; in kvm_mmu_memory_cache_nr_free_objects()
415 while (mc->nobjs) { in kvm_mmu_free_memory_cache()
416 if (mc->kmem_cache) in kvm_mmu_free_memory_cache()
417 kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
419 free_page((unsigned long)mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
422 kvfree(mc->objects); in kvm_mmu_free_memory_cache()
424 mc->objects = NULL; in kvm_mmu_free_memory_cache()
425 mc->capacity = 0; in kvm_mmu_free_memory_cache()
432 if (WARN_ON(!mc->nobjs)) in kvm_mmu_memory_cache_alloc()
435 p = mc->objects[--mc->nobjs]; in kvm_mmu_memory_cache_alloc()
443 mutex_init(&vcpu->mutex); in kvm_vcpu_init()
444 vcpu->cpu = -1; in kvm_vcpu_init()
445 vcpu->kvm = kvm; in kvm_vcpu_init()
446 vcpu->vcpu_id = id; in kvm_vcpu_init()
447 vcpu->pid = NULL; in kvm_vcpu_init()
448 rwlock_init(&vcpu->pid_lock); in kvm_vcpu_init()
450 rcuwait_init(&vcpu->wait); in kvm_vcpu_init()
456 vcpu->preempted = false; in kvm_vcpu_init()
457 vcpu->ready = false; in kvm_vcpu_init()
458 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); in kvm_vcpu_init()
459 vcpu->last_used_slot = NULL; in kvm_vcpu_init()
462 snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", in kvm_vcpu_init()
469 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vcpu_destroy()
473 * the vcpu->pid pointer, and at destruction time all file descriptors in kvm_vcpu_destroy()
476 put_pid(vcpu->pid); in kvm_vcpu_destroy()
478 free_page((unsigned long)vcpu->run); in kvm_vcpu_destroy()
489 xa_erase(&kvm->vcpu_array, i); in kvm_destroy_vcpus()
493 * doesn't trigger a use-after-free if destroying vCPUs results in kvm_destroy_vcpus()
494 * in VM-wide request, e.g. to flush remote TLBs when tearing in kvm_destroy_vcpus()
497 WARN_ON_ONCE(xa_load(&kvm->vcpu_array, i) || kvm_get_vcpu(kvm, i)); in kvm_destroy_vcpus()
500 atomic_set(&kvm->online_vcpus, 0); in kvm_destroy_vcpus()
516 * 64-bit addresses, as KVM notifiers can operate on host virtual
517 * addresses (unsigned long) and guest physical addresses (64-bit).
530 * The inner-most helper returns a tuple containing the return value from the
531 * arch- and action-specific handler, plus a flag indicating whether or not at
535 * return from arch code as a bool, outer helpers will cast it to an int. :-(
545 * function will have a non-zero address, and so it will generate code to
557 for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
573 if (WARN_ON_ONCE(range->end <= range->start)) in kvm_handle_hva_range()
577 if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && in kvm_handle_hva_range()
578 IS_KVM_NULL_FN(range->handler))) in kvm_handle_hva_range()
582 if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock))) in kvm_handle_hva_range()
585 idx = srcu_read_lock(&kvm->srcu); in kvm_handle_hva_range()
592 range->start, range->end - 1) { in kvm_handle_hva_range()
595 slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); in kvm_handle_hva_range()
596 hva_start = max_t(unsigned long, range->start, slot->userspace_addr); in kvm_handle_hva_range()
597 hva_end = min_t(unsigned long, range->end, in kvm_handle_hva_range()
598 slot->userspace_addr + (slot->npages << PAGE_SHIFT)); in kvm_handle_hva_range()
606 gfn_range.arg = range->arg; in kvm_handle_hva_range()
607 gfn_range.may_block = range->may_block; in kvm_handle_hva_range()
609 * HVA-based notifications aren't relevant to private in kvm_handle_hva_range()
616 * {gfn_start, gfn_start+1, ..., gfn_end-1}. in kvm_handle_hva_range()
619 gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); in kvm_handle_hva_range()
621 gfn_range.lockless = range->lockless; in kvm_handle_hva_range()
625 if (!range->lockless) { in kvm_handle_hva_range()
627 if (!IS_KVM_NULL_FN(range->on_lock)) in kvm_handle_hva_range()
628 range->on_lock(kvm); in kvm_handle_hva_range()
630 if (IS_KVM_NULL_FN(range->handler)) in kvm_handle_hva_range()
634 r.ret |= range->handler(kvm, &gfn_range); in kvm_handle_hva_range()
638 if (range->flush_on_ret && r.ret) in kvm_handle_hva_range()
642 if (r.found_memslot && !range->lockless) in kvm_handle_hva_range()
645 srcu_read_unlock(&kvm->srcu, idx); in kvm_handle_hva_range()
680 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_begin()
683 * spte can be established without taking the mmu_lock and in kvm_mmu_invalidate_begin()
686 kvm->mmu_invalidate_in_progress++; in kvm_mmu_invalidate_begin()
688 if (likely(kvm->mmu_invalidate_in_progress == 1)) { in kvm_mmu_invalidate_begin()
689 kvm->mmu_invalidate_range_start = INVALID_GPA; in kvm_mmu_invalidate_begin()
690 kvm->mmu_invalidate_range_end = INVALID_GPA; in kvm_mmu_invalidate_begin()
696 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_range_add()
698 WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress); in kvm_mmu_invalidate_range_add()
700 if (likely(kvm->mmu_invalidate_range_start == INVALID_GPA)) { in kvm_mmu_invalidate_range_add()
701 kvm->mmu_invalidate_range_start = start; in kvm_mmu_invalidate_range_add()
702 kvm->mmu_invalidate_range_end = end; in kvm_mmu_invalidate_range_add()
713 kvm->mmu_invalidate_range_start = in kvm_mmu_invalidate_range_add()
714 min(kvm->mmu_invalidate_range_start, start); in kvm_mmu_invalidate_range_add()
715 kvm->mmu_invalidate_range_end = in kvm_mmu_invalidate_range_add()
716 max(kvm->mmu_invalidate_range_end, end); in kvm_mmu_invalidate_range_add()
722 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_mmu_unmap_gfn_range()
731 .start = range->start, in kvm_mmu_notifier_invalidate_range_start()
732 .end = range->end, in kvm_mmu_notifier_invalidate_range_start()
739 trace_kvm_unmap_hva_range(range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
744 * functions. Without that guarantee, the mmu_invalidate_in_progress in kvm_mmu_notifier_invalidate_range_start()
749 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
750 kvm->mn_active_invalidate_count++; in kvm_mmu_notifier_invalidate_range_start()
751 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
759 * Because this runs without holding mmu_lock, the pfn caches must use in kvm_mmu_notifier_invalidate_range_start()
763 gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
778 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_end()
785 kvm->mmu_invalidate_seq++; in kvm_mmu_invalidate_end()
792 kvm->mmu_invalidate_in_progress--; in kvm_mmu_invalidate_end()
793 KVM_BUG_ON(kvm->mmu_invalidate_in_progress < 0, kvm); in kvm_mmu_invalidate_end()
799 WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA); in kvm_mmu_invalidate_end()
807 .start = range->start, in kvm_mmu_notifier_invalidate_range_end()
808 .end = range->end, in kvm_mmu_notifier_invalidate_range_end()
819 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
820 if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count)) in kvm_mmu_notifier_invalidate_range_end()
821 --kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
822 wake = !kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
823 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
830 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); in kvm_mmu_notifier_invalidate_range_end()
853 * affect performance on pre-Haswell Intel EPT, where there is in kvm_mmu_notifier_clear_young()
883 idx = srcu_read_lock(&kvm->srcu); in kvm_mmu_notifier_release()
885 srcu_read_unlock(&kvm->srcu, idx); in kvm_mmu_notifier_release()
899 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; in kvm_init_mmu_notifier()
900 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); in kvm_init_mmu_notifier()
924 kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; in kvm_init_pm_notifier()
926 kvm->pm_notifier.priority = INT_MAX; in kvm_init_pm_notifier()
927 register_pm_notifier(&kvm->pm_notifier); in kvm_init_pm_notifier()
932 unregister_pm_notifier(&kvm->pm_notifier); in kvm_destroy_pm_notifier()
946 if (!memslot->dirty_bitmap) in kvm_destroy_dirty_bitmap()
949 vfree(memslot->dirty_bitmap); in kvm_destroy_dirty_bitmap()
950 memslot->dirty_bitmap = NULL; in kvm_destroy_dirty_bitmap()
956 if (slot->flags & KVM_MEM_GUEST_MEMFD) in kvm_free_memslot()
978 if (!slots->node_idx) in kvm_free_memslots()
981 hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) in kvm_free_memslots()
987 switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { in kvm_stats_debugfs_mode()
1004 if (IS_ERR(kvm->debugfs_dentry)) in kvm_destroy_vm_debugfs()
1007 debugfs_remove_recursive(kvm->debugfs_dentry); in kvm_destroy_vm_debugfs()
1009 if (kvm->debugfs_stat_data) { in kvm_destroy_vm_debugfs()
1011 kfree(kvm->debugfs_stat_data[i]); in kvm_destroy_vm_debugfs()
1012 kfree(kvm->debugfs_stat_data); in kvm_destroy_vm_debugfs()
1023 int i, ret = -ENOMEM; in kvm_create_vm_debugfs()
1030 snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); in kvm_create_vm_debugfs()
1044 kvm->debugfs_dentry = dent; in kvm_create_vm_debugfs()
1045 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, in kvm_create_vm_debugfs()
1046 sizeof(*kvm->debugfs_stat_data), in kvm_create_vm_debugfs()
1048 if (!kvm->debugfs_stat_data) in kvm_create_vm_debugfs()
1057 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1058 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1059 stat_data->kind = KVM_STAT_VM; in kvm_create_vm_debugfs()
1060 kvm->debugfs_stat_data[i] = stat_data; in kvm_create_vm_debugfs()
1061 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1062 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1072 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1073 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1074 stat_data->kind = KVM_STAT_VCPU; in kvm_create_vm_debugfs()
1075 kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; in kvm_create_vm_debugfs()
1076 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1077 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1097 * Called after per-vm debugfs created. When called kvm->debugfs_dentry should
1098 * be setup already, so we can create arch-specific debugfs entries under it.
1100 * a per-arch destroy interface is not needed.
1110 return rcu_dereference_protected(kvm->buses[idx], in kvm_get_bus_for_destruction()
1111 !refcount_read(&kvm->users_count)); in kvm_get_bus_for_destruction()
1121 return ERR_PTR(-ENOMEM); in kvm_create_vm()
1124 mmgrab(current->mm); in kvm_create_vm()
1125 kvm->mm = current->mm; in kvm_create_vm()
1127 mutex_init(&kvm->lock); in kvm_create_vm()
1128 mutex_init(&kvm->irq_lock); in kvm_create_vm()
1129 mutex_init(&kvm->slots_lock); in kvm_create_vm()
1130 mutex_init(&kvm->slots_arch_lock); in kvm_create_vm()
1131 spin_lock_init(&kvm->mn_invalidate_lock); in kvm_create_vm()
1132 rcuwait_init(&kvm->mn_memslots_update_rcuwait); in kvm_create_vm()
1133 xa_init(&kvm->vcpu_array); in kvm_create_vm()
1135 xa_init(&kvm->mem_attr_array); in kvm_create_vm()
1138 INIT_LIST_HEAD(&kvm->gpc_list); in kvm_create_vm()
1139 spin_lock_init(&kvm->gpc_lock); in kvm_create_vm()
1141 INIT_LIST_HEAD(&kvm->devices); in kvm_create_vm()
1142 kvm->max_vcpus = KVM_MAX_VCPUS; in kvm_create_vm()
1150 kvm->debugfs_dentry = ERR_PTR(-ENOENT); in kvm_create_vm()
1152 snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", in kvm_create_vm()
1155 r = -ENOMEM; in kvm_create_vm()
1156 if (init_srcu_struct(&kvm->srcu)) in kvm_create_vm()
1158 if (init_srcu_struct(&kvm->irq_srcu)) in kvm_create_vm()
1165 refcount_set(&kvm->users_count, 1); in kvm_create_vm()
1169 slots = &kvm->__memslots[i][j]; in kvm_create_vm()
1171 atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); in kvm_create_vm()
1172 slots->hva_tree = RB_ROOT_CACHED; in kvm_create_vm()
1173 slots->gfn_tree = RB_ROOT; in kvm_create_vm()
1174 hash_init(slots->id_hash); in kvm_create_vm()
1175 slots->node_idx = j; in kvm_create_vm()
1178 slots->generation = i; in kvm_create_vm()
1181 rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); in kvm_create_vm()
1184 r = -ENOMEM; in kvm_create_vm()
1186 rcu_assign_pointer(kvm->buses[i], in kvm_create_vm()
1188 if (!kvm->buses[i]) in kvm_create_vm()
1201 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); in kvm_create_vm()
1217 list_add(&kvm->vm_list, &vm_list); in kvm_create_vm()
1229 if (kvm->mmu_notifier.ops) in kvm_create_vm()
1230 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); in kvm_create_vm()
1237 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); in kvm_create_vm()
1242 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_create_vm()
1244 cleanup_srcu_struct(&kvm->srcu); in kvm_create_vm()
1247 mmdrop(current->mm); in kvm_create_vm()
1256 * We do not need to take the kvm->lock here, because nobody else in kvm_destroy_devices()
1264 * use-after-free, even though this cannot be guaranteed. in kvm_destroy_devices()
1266 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { in kvm_destroy_devices()
1267 list_del(&dev->vm_node); in kvm_destroy_devices()
1268 dev->ops->destroy(dev); in kvm_destroy_devices()
1275 struct mm_struct *mm = kvm->mm; in kvm_destroy_vm()
1281 list_del(&kvm->vm_list); in kvm_destroy_vm()
1291 kvm->buses[i] = NULL; in kvm_destroy_vm()
1295 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); in kvm_destroy_vm()
1302 * memslots would deadlock without this manual intervention. in kvm_destroy_vm()
1306 * in-progress invalidations. in kvm_destroy_vm()
1308 WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); in kvm_destroy_vm()
1309 if (kvm->mn_active_invalidate_count) in kvm_destroy_vm()
1310 kvm->mn_active_invalidate_count = 0; in kvm_destroy_vm()
1312 WARN_ON(kvm->mmu_invalidate_in_progress); in kvm_destroy_vm()
1319 kvm_free_memslots(kvm, &kvm->__memslots[i][0]); in kvm_destroy_vm()
1320 kvm_free_memslots(kvm, &kvm->__memslots[i][1]); in kvm_destroy_vm()
1322 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_destroy_vm()
1323 srcu_barrier(&kvm->srcu); in kvm_destroy_vm()
1324 cleanup_srcu_struct(&kvm->srcu); in kvm_destroy_vm()
1326 xa_destroy(&kvm->mem_attr_array); in kvm_destroy_vm()
1336 refcount_inc(&kvm->users_count); in kvm_get_kvm()
1346 return refcount_inc_not_zero(&kvm->users_count); in kvm_get_kvm_safe()
1352 if (refcount_dec_and_test(&kvm->users_count)) in kvm_put_kvm()
1359 * with a user-visible file descriptor, e.g. a vcpu or device, if installation
1360 * of the new file descriptor fails and the reference cannot be transferred to
1366 WARN_ON(refcount_dec_and_test(&kvm->users_count)); in kvm_put_kvm_no_destroy()
1372 struct kvm *kvm = filp->private_data; in kvm_vm_release()
1385 lockdep_assert_held(&kvm->lock); in kvm_trylock_all_vcpus()
1388 if (!mutex_trylock_nest_lock(&vcpu->mutex, &kvm->lock)) in kvm_trylock_all_vcpus()
1396 mutex_unlock(&vcpu->mutex); in kvm_trylock_all_vcpus()
1398 return -EINTR; in kvm_trylock_all_vcpus()
1408 lockdep_assert_held(&kvm->lock); in kvm_lock_all_vcpus()
1411 r = mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock); in kvm_lock_all_vcpus()
1421 mutex_unlock(&vcpu->mutex); in kvm_lock_all_vcpus()
1432 lockdep_assert_held(&kvm->lock); in kvm_unlock_all_vcpus()
1435 mutex_unlock(&vcpu->mutex); in kvm_unlock_all_vcpus()
1447 memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); in kvm_alloc_dirty_bitmap()
1448 if (!memslot->dirty_bitmap) in kvm_alloc_dirty_bitmap()
1449 return -ENOMEM; in kvm_alloc_dirty_bitmap()
1457 int node_idx_inactive = active->node_idx ^ 1; in kvm_get_inactive_memslots()
1459 return &kvm->__memslots[as_id][node_idx_inactive]; in kvm_get_inactive_memslots()
1464 * This also serves as a sanity that at least one of the pointers is non-NULL,
1474 return b->as_id; in kvm_memslots_get_as_id()
1476 return a->as_id; in kvm_memslots_get_as_id()
1478 WARN_ON_ONCE(a->as_id != b->as_id); in kvm_memslots_get_as_id()
1479 return a->as_id; in kvm_memslots_get_as_id()
1485 struct rb_root *gfn_tree = &slots->gfn_tree; in kvm_insert_gfn_node()
1487 int idx = slots->node_idx; in kvm_insert_gfn_node()
1490 for (node = &gfn_tree->rb_node; *node; ) { in kvm_insert_gfn_node()
1495 if (slot->base_gfn < tmp->base_gfn) in kvm_insert_gfn_node()
1496 node = &(*node)->rb_left; in kvm_insert_gfn_node()
1497 else if (slot->base_gfn > tmp->base_gfn) in kvm_insert_gfn_node()
1498 node = &(*node)->rb_right; in kvm_insert_gfn_node()
1503 rb_link_node(&slot->gfn_node[idx], parent, node); in kvm_insert_gfn_node()
1504 rb_insert_color(&slot->gfn_node[idx], gfn_tree); in kvm_insert_gfn_node()
1510 rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); in kvm_erase_gfn_node()
1517 int idx = slots->node_idx; in kvm_replace_gfn_node()
1519 WARN_ON_ONCE(old->base_gfn != new->base_gfn); in kvm_replace_gfn_node()
1521 rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], in kvm_replace_gfn_node()
1522 &slots->gfn_tree); in kvm_replace_gfn_node()
1531 * If @new is non-NULL its hva_node[slots_idx] range has to be set
1540 int idx = slots->node_idx; in kvm_replace_memslot()
1543 hash_del(&old->id_node[idx]); in kvm_replace_memslot()
1544 interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1546 if ((long)old == atomic_long_read(&slots->last_used_slot)) in kvm_replace_memslot()
1547 atomic_long_set(&slots->last_used_slot, (long)new); in kvm_replace_memslot()
1559 new->hva_node[idx].start = new->userspace_addr; in kvm_replace_memslot()
1560 new->hva_node[idx].last = new->userspace_addr + in kvm_replace_memslot()
1561 (new->npages << PAGE_SHIFT) - 1; in kvm_replace_memslot()
1568 hash_add(slots->id_hash, &new->id_node[idx], new->id); in kvm_replace_memslot()
1569 interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1578 if (old && old->base_gfn == new->base_gfn) { in kvm_replace_memslot()
1604 if (mem->flags & KVM_MEM_GUEST_MEMFD) in check_memory_region_flags()
1608 * GUEST_MEMFD is incompatible with read-only memslots, as writes to in check_memory_region_flags()
1609 * read-only memslots have emulated MMIO, not page fault, semantics, in check_memory_region_flags()
1613 !(mem->flags & KVM_MEM_GUEST_MEMFD)) in check_memory_region_flags()
1616 if (mem->flags & ~valid_flags) in check_memory_region_flags()
1617 return -EINVAL; in check_memory_region_flags()
1627 u64 gen = __kvm_memslots(kvm, as_id)->generation; in kvm_swap_active_memslots()
1630 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1637 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1638 prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1639 while (kvm->mn_active_invalidate_count) { in kvm_swap_active_memslots()
1641 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1643 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1645 finish_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1646 rcu_assign_pointer(kvm->memslots[as_id], slots); in kvm_swap_active_memslots()
1647 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1654 mutex_unlock(&kvm->slots_arch_lock); in kvm_swap_active_memslots()
1656 synchronize_srcu_expedited(&kvm->srcu); in kvm_swap_active_memslots()
1660 * update in-progress flag and incrementing the generation based on in kvm_swap_active_memslots()
1664 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1677 slots->generation = gen; in kvm_swap_active_memslots()
1695 if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) in kvm_prepare_memory_region()
1696 new->dirty_bitmap = NULL; in kvm_prepare_memory_region()
1697 else if (old && old->dirty_bitmap) in kvm_prepare_memory_region()
1698 new->dirty_bitmap = old->dirty_bitmap; in kvm_prepare_memory_region()
1705 bitmap_set(new->dirty_bitmap, 0, new->npages); in kvm_prepare_memory_region()
1712 if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) in kvm_prepare_memory_region()
1723 int old_flags = old ? old->flags : 0; in kvm_commit_memory_region()
1724 int new_flags = new ? new->flags : 0; in kvm_commit_memory_region()
1730 kvm->nr_memslot_pages -= old->npages; in kvm_commit_memory_region()
1732 kvm->nr_memslot_pages += new->npages; in kvm_commit_memory_region()
1735 int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; in kvm_commit_memory_region()
1736 atomic_set(&kvm->nr_memslots_dirty_logging, in kvm_commit_memory_region()
1737 atomic_read(&kvm->nr_memslots_dirty_logging) + change); in kvm_commit_memory_region()
1756 if (old->dirty_bitmap && !new->dirty_bitmap) in kvm_commit_memory_region()
1795 dest->base_gfn = src->base_gfn; in kvm_copy_memslot()
1796 dest->npages = src->npages; in kvm_copy_memslot()
1797 dest->dirty_bitmap = src->dirty_bitmap; in kvm_copy_memslot()
1798 dest->arch = src->arch; in kvm_copy_memslot()
1799 dest->userspace_addr = src->userspace_addr; in kvm_copy_memslot()
1800 dest->flags = src->flags; in kvm_copy_memslot()
1801 dest->id = src->id; in kvm_copy_memslot()
1802 dest->as_id = src->as_id; in kvm_copy_memslot()
1815 invalid_slot->flags |= KVM_MEMSLOT_INVALID; in kvm_invalidate_memslot()
1823 kvm_swap_active_memslots(kvm, old->as_id); in kvm_invalidate_memslot()
1827 * memslot will be created. Validation of sp->gfn happens in: in kvm_invalidate_memslot()
1828 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) in kvm_invalidate_memslot()
1829 * - kvm_is_visible_gfn (mmu_check_root) in kvm_invalidate_memslot()
1835 mutex_lock(&kvm->slots_arch_lock); in kvm_invalidate_memslot()
1838 * Copy the arch-specific field of the newly-installed slot back to the in kvm_invalidate_memslot()
1840 * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock in kvm_invalidate_memslot()
1844 old->arch = invalid_slot->arch; in kvm_invalidate_memslot()
1915 mutex_lock(&kvm->slots_arch_lock); in kvm_set_memslot()
1921 * for the memslot when it is deleted/moved. Without pre-invalidation in kvm_set_memslot()
1922 * (and without a lock), a window would exist between effecting the in kvm_set_memslot()
1924 * guest could access a non-existent memslot. in kvm_set_memslot()
1927 * slot needs to be preserved in case a later step fails and the in kvm_set_memslot()
1933 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1934 return -ENOMEM; in kvm_set_memslot()
1951 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1979 * No need to refresh new->arch, changes after dropping slots_arch_lock in kvm_set_memslot()
1981 * responsible for knowing that new->arch may be stale. in kvm_set_memslot()
1994 if (iter.slot->id != id) in kvm_check_memslot_overlap()
2012 lockdep_assert_held(&kvm->slots_lock); in kvm_set_memory_region()
2018 as_id = mem->slot >> 16; in kvm_set_memory_region()
2019 id = (u16)mem->slot; in kvm_set_memory_region()
2022 if ((mem->memory_size & (PAGE_SIZE - 1)) || in kvm_set_memory_region()
2023 (mem->memory_size != (unsigned long)mem->memory_size)) in kvm_set_memory_region()
2024 return -EINVAL; in kvm_set_memory_region()
2025 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) in kvm_set_memory_region()
2026 return -EINVAL; in kvm_set_memory_region()
2028 if ((mem->userspace_addr & (PAGE_SIZE - 1)) || in kvm_set_memory_region()
2029 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || in kvm_set_memory_region()
2030 !access_ok((void __user *)(unsigned long)mem->userspace_addr, in kvm_set_memory_region()
2031 mem->memory_size)) in kvm_set_memory_region()
2032 return -EINVAL; in kvm_set_memory_region()
2033 if (mem->flags & KVM_MEM_GUEST_MEMFD && in kvm_set_memory_region()
2034 (mem->guest_memfd_offset & (PAGE_SIZE - 1) || in kvm_set_memory_region()
2035 mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset)) in kvm_set_memory_region()
2036 return -EINVAL; in kvm_set_memory_region()
2038 return -EINVAL; in kvm_set_memory_region()
2039 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) in kvm_set_memory_region()
2040 return -EINVAL; in kvm_set_memory_region()
2043 * The size of userspace-defined memory regions is restricted in order in kvm_set_memory_region()
2049 (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) in kvm_set_memory_region()
2050 return -EINVAL; in kvm_set_memory_region()
2060 if (!mem->memory_size) { in kvm_set_memory_region()
2061 if (!old || !old->npages) in kvm_set_memory_region()
2062 return -EINVAL; in kvm_set_memory_region()
2064 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) in kvm_set_memory_region()
2065 return -EIO; in kvm_set_memory_region()
2070 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT); in kvm_set_memory_region()
2071 npages = (mem->memory_size >> PAGE_SHIFT); in kvm_set_memory_region()
2073 if (!old || !old->npages) { in kvm_set_memory_region()
2080 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) in kvm_set_memory_region()
2081 return -EINVAL; in kvm_set_memory_region()
2084 if (mem->flags & KVM_MEM_GUEST_MEMFD) in kvm_set_memory_region()
2085 return -EINVAL; in kvm_set_memory_region()
2086 if ((mem->userspace_addr != old->userspace_addr) || in kvm_set_memory_region()
2087 (npages != old->npages) || in kvm_set_memory_region()
2088 ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) in kvm_set_memory_region()
2089 return -EINVAL; in kvm_set_memory_region()
2091 if (base_gfn != old->base_gfn) in kvm_set_memory_region()
2093 else if (mem->flags != old->flags) in kvm_set_memory_region()
2101 return -EEXIST; in kvm_set_memory_region()
2106 return -ENOMEM; in kvm_set_memory_region()
2108 new->as_id = as_id; in kvm_set_memory_region()
2109 new->id = id; in kvm_set_memory_region()
2110 new->base_gfn = base_gfn; in kvm_set_memory_region()
2111 new->npages = npages; in kvm_set_memory_region()
2112 new->flags = mem->flags; in kvm_set_memory_region()
2113 new->userspace_addr = mem->userspace_addr; in kvm_set_memory_region()
2114 if (mem->flags & KVM_MEM_GUEST_MEMFD) { in kvm_set_memory_region()
2115 r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset); in kvm_set_memory_region()
2127 if (mem->flags & KVM_MEM_GUEST_MEMFD) in kvm_set_memory_region()
2137 if (WARN_ON_ONCE(mem->slot < KVM_USER_MEM_SLOTS)) in kvm_set_internal_memslot()
2138 return -EINVAL; in kvm_set_internal_memslot()
2140 if (WARN_ON_ONCE(mem->flags)) in kvm_set_internal_memslot()
2141 return -EINVAL; in kvm_set_internal_memslot()
2150 if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) in kvm_vm_ioctl_set_memory_region()
2151 return -EINVAL; in kvm_vm_ioctl_set_memory_region()
2153 guard(mutex)(&kvm->slots_lock); in kvm_vm_ioctl_set_memory_region()
2159 * kvm_get_dirty_log - get a snapshot of dirty pages
2175 return -ENXIO; in kvm_get_dirty_log()
2180 as_id = log->slot >> 16; in kvm_get_dirty_log()
2181 id = (u16)log->slot; in kvm_get_dirty_log()
2183 return -EINVAL; in kvm_get_dirty_log()
2187 if (!(*memslot) || !(*memslot)->dirty_bitmap) in kvm_get_dirty_log()
2188 return -ENOENT; in kvm_get_dirty_log()
2195 any = (*memslot)->dirty_bitmap[i]; in kvm_get_dirty_log()
2197 if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) in kvm_get_dirty_log()
2198 return -EFAULT; in kvm_get_dirty_log()
2208 * kvm_get_dirty_log_protect - get a snapshot of dirty pages
2240 return -ENXIO; in kvm_get_dirty_log_protect()
2242 as_id = log->slot >> 16; in kvm_get_dirty_log_protect()
2243 id = (u16)log->slot; in kvm_get_dirty_log_protect()
2245 return -EINVAL; in kvm_get_dirty_log_protect()
2249 if (!memslot || !memslot->dirty_bitmap) in kvm_get_dirty_log_protect()
2250 return -ENOENT; in kvm_get_dirty_log_protect()
2252 dirty_bitmap = memslot->dirty_bitmap; in kvm_get_dirty_log_protect()
2258 if (kvm->manual_dirty_log_protect) { in kvm_get_dirty_log_protect()
2294 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) in kvm_get_dirty_log_protect()
2295 return -EFAULT; in kvm_get_dirty_log_protect()
2301 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
2305 * Steps 1-4 below provide general overview of dirty page logging. See
2308 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
2324 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2328 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2333 * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
2352 return -ENXIO; in kvm_clear_dirty_log_protect()
2354 as_id = log->slot >> 16; in kvm_clear_dirty_log_protect()
2355 id = (u16)log->slot; in kvm_clear_dirty_log_protect()
2357 return -EINVAL; in kvm_clear_dirty_log_protect()
2359 if (log->first_page & 63) in kvm_clear_dirty_log_protect()
2360 return -EINVAL; in kvm_clear_dirty_log_protect()
2364 if (!memslot || !memslot->dirty_bitmap) in kvm_clear_dirty_log_protect()
2365 return -ENOENT; in kvm_clear_dirty_log_protect()
2367 dirty_bitmap = memslot->dirty_bitmap; in kvm_clear_dirty_log_protect()
2369 n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; in kvm_clear_dirty_log_protect()
2371 if (log->first_page > memslot->npages || in kvm_clear_dirty_log_protect()
2372 log->num_pages > memslot->npages - log->first_page || in kvm_clear_dirty_log_protect()
2373 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) in kvm_clear_dirty_log_protect()
2374 return -EINVAL; in kvm_clear_dirty_log_protect()
2380 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) in kvm_clear_dirty_log_protect()
2381 return -EFAULT; in kvm_clear_dirty_log_protect()
2384 for (offset = log->first_page, i = offset / BITS_PER_LONG, in kvm_clear_dirty_log_protect()
2385 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; in kvm_clear_dirty_log_protect()
2398 * a problem if userspace sets them in log->dirty_bitmap. in kvm_clear_dirty_log_protect()
2419 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2423 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2444 XA_STATE(xas, &kvm->mem_attr_array, start); in kvm_range_has_memory_attributes()
2457 return !xas_find(&xas, end - 1); in kvm_range_has_memory_attributes()
2483 gfn_range.arg = range->arg; in kvm_handle_gfn_range()
2484 gfn_range.may_block = range->may_block; in kvm_handle_gfn_range()
2497 kvm_for_each_memslot_in_gfn_range(&iter, slots, range->start, range->end) { in kvm_handle_gfn_range()
2501 gfn_range.start = max(range->start, slot->base_gfn); in kvm_handle_gfn_range()
2502 gfn_range.end = min(range->end, slot->base_gfn + slot->npages); in kvm_handle_gfn_range()
2509 if (!IS_KVM_NULL_FN(range->on_lock)) in kvm_handle_gfn_range()
2510 range->on_lock(kvm); in kvm_handle_gfn_range()
2513 ret |= range->handler(kvm, &gfn_range); in kvm_handle_gfn_range()
2517 if (range->flush_on_ret && ret) in kvm_handle_gfn_range()
2538 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_pre_set_memory_attributes()
2572 mutex_lock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2583 r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); in kvm_vm_set_mem_attributes()
2593 r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, in kvm_vm_set_mem_attributes()
2602 mutex_unlock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2612 if (attrs->flags) in kvm_vm_ioctl_set_mem_attributes()
2613 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2614 if (attrs->attributes & ~kvm_supported_mem_attributes(kvm)) in kvm_vm_ioctl_set_mem_attributes()
2615 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2616 if (attrs->size == 0 || attrs->address + attrs->size < attrs->address) in kvm_vm_ioctl_set_mem_attributes()
2617 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2618 if (!PAGE_ALIGNED(attrs->address) || !PAGE_ALIGNED(attrs->size)) in kvm_vm_ioctl_set_mem_attributes()
2619 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2621 start = attrs->address >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2622 end = (attrs->address + attrs->size) >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2626 * KVM. For simplicity, supports generic attributes only on 64-bit in kvm_vm_ioctl_set_mem_attributes()
2629 BUILD_BUG_ON(sizeof(attrs->attributes) != sizeof(unsigned long)); in kvm_vm_ioctl_set_mem_attributes()
2631 return kvm_vm_set_mem_attributes(kvm, start, end, attrs->attributes); in kvm_vm_ioctl_set_mem_attributes()
2644 u64 gen = slots->generation; in kvm_vcpu_gfn_to_memslot()
2651 if (unlikely(gen != vcpu->last_used_slot_gen)) { in kvm_vcpu_gfn_to_memslot()
2652 vcpu->last_used_slot = NULL; in kvm_vcpu_gfn_to_memslot()
2653 vcpu->last_used_slot_gen = gen; in kvm_vcpu_gfn_to_memslot()
2656 slot = try_get_memslot(vcpu->last_used_slot, gfn); in kvm_vcpu_gfn_to_memslot()
2663 * thrashing the VM-wide last_used_slot in kvm_memslots. in kvm_vcpu_gfn_to_memslot()
2667 vcpu->last_used_slot = slot; in kvm_vcpu_gfn_to_memslot()
2702 mmap_read_lock(current->mm); in kvm_host_page_size()
2703 vma = find_vma(current->mm, addr); in kvm_host_page_size()
2710 mmap_read_unlock(current->mm); in kvm_host_page_size()
2717 return slot->flags & KVM_MEM_READONLY; in memslot_is_readonly()
2723 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) in __gfn_to_hva_many()
2730 *nr_pages = slot->npages - (gfn - slot->base_gfn); in __gfn_to_hva_many()
2796 * Per page-flags.h, pages tagged PG_reserved "should in general not be in kvm_is_ad_tracked_page()
2841 if (kfp->map_writable) in kvm_resolve_pfn()
2842 *kfp->map_writable = writable; in kvm_resolve_pfn()
2845 pfn = map->pfn; in kvm_resolve_pfn()
2849 *kfp->refcounted_page = page; in kvm_resolve_pfn()
2864 * Try the fast-only path when the caller wants to pin/get the page for in hva_to_pfn_fast()
2867 * breaks Copy-on-Write (CoW), e.g. so that KVM doesn't end up pointing in hva_to_pfn_fast()
2868 * at the old, read-only page while mm/ points at a new, writable page. in hva_to_pfn_fast()
2870 if (!((kfp->flags & FOLL_WRITE) || kfp->map_writable)) in hva_to_pfn_fast()
2873 if (kfp->pin) in hva_to_pfn_fast()
2874 r = pin_user_pages_fast(kfp->hva, 1, FOLL_WRITE, &page) == 1; in hva_to_pfn_fast()
2876 r = get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page); in hva_to_pfn_fast()
2888 * 1 indicates success, -errno is returned if error is detected.
2903 unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT | kfp->flags; in hva_to_pfn_slow()
2907 if (kfp->pin) in hva_to_pfn_slow()
2908 npages = pin_user_pages_unlocked(kfp->hva, 1, &page, flags); in hva_to_pfn_slow()
2910 npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags); in hva_to_pfn_slow()
2919 if (WARN_ON_ONCE(kfp->map_writable && kfp->pin)) in hva_to_pfn_slow()
2923 if (!(flags & FOLL_WRITE) && kfp->map_writable && in hva_to_pfn_slow()
2924 get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) { in hva_to_pfn_slow()
2937 if (unlikely(!(vma->vm_flags & VM_READ))) in vma_is_valid()
2940 if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) in vma_is_valid()
2949 struct follow_pfnmap_args args = { .vma = vma, .address = kfp->hva }; in hva_to_pfn_remapped()
2950 bool write_fault = kfp->flags & FOLL_WRITE; in hva_to_pfn_remapped()
2958 if (kfp->pin && !allow_unsafe_mappings) in hva_to_pfn_remapped()
2959 return -EINVAL; in hva_to_pfn_remapped()
2964 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does in hva_to_pfn_remapped()
2968 r = fixup_user_fault(current->mm, kfp->hva, in hva_to_pfn_remapped()
2972 return -EAGAIN; in hva_to_pfn_remapped()
3000 if (WARN_ON_ONCE(!kfp->refcounted_page)) in hva_to_pfn()
3009 if (npages == -EINTR || npages == -EAGAIN) in hva_to_pfn()
3011 if (npages == -EHWPOISON) in hva_to_pfn()
3014 mmap_read_lock(current->mm); in hva_to_pfn()
3016 vma = vma_lookup(current->mm, kfp->hva); in hva_to_pfn()
3020 else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { in hva_to_pfn()
3022 if (r == -EAGAIN) in hva_to_pfn()
3027 if ((kfp->flags & FOLL_NOWAIT) && in hva_to_pfn()
3028 vma_is_valid(vma, kfp->flags & FOLL_WRITE)) in hva_to_pfn()
3033 mmap_read_unlock(current->mm); in hva_to_pfn()
3039 kfp->hva = __gfn_to_hva_many(kfp->slot, kfp->gfn, NULL, in kvm_follow_pfn()
3040 kfp->flags & FOLL_WRITE); in kvm_follow_pfn()
3042 if (kfp->hva == KVM_HVA_ERR_RO_BAD) in kvm_follow_pfn()
3045 if (kvm_is_error_hva(kfp->hva)) in kvm_follow_pfn()
3048 if (memslot_is_readonly(kfp->slot) && kfp->map_writable) { in kvm_follow_pfn()
3049 *kfp->map_writable = false; in kvm_follow_pfn()
3050 kfp->map_writable = NULL; in kvm_follow_pfn()
3086 return -1; in kvm_prefetch_pages()
3121 .slot = gfn_to_memslot(vcpu->kvm, gfn), in __kvm_vcpu_map()
3124 .refcounted_page = &map->pinned_page, in __kvm_vcpu_map()
3128 map->pinned_page = NULL; in __kvm_vcpu_map()
3129 map->page = NULL; in __kvm_vcpu_map()
3130 map->hva = NULL; in __kvm_vcpu_map()
3131 map->gfn = gfn; in __kvm_vcpu_map()
3132 map->writable = writable; in __kvm_vcpu_map()
3134 map->pfn = kvm_follow_pfn(&kfp); in __kvm_vcpu_map()
3135 if (is_error_noslot_pfn(map->pfn)) in __kvm_vcpu_map()
3136 return -EINVAL; in __kvm_vcpu_map()
3138 if (pfn_valid(map->pfn)) { in __kvm_vcpu_map()
3139 map->page = pfn_to_page(map->pfn); in __kvm_vcpu_map()
3140 map->hva = kmap(map->page); in __kvm_vcpu_map()
3143 map->hva = memremap(pfn_to_hpa(map->pfn), PAGE_SIZE, MEMREMAP_WB); in __kvm_vcpu_map()
3147 return map->hva ? 0 : -EFAULT; in __kvm_vcpu_map()
3153 if (!map->hva) in kvm_vcpu_unmap()
3156 if (map->page) in kvm_vcpu_unmap()
3157 kunmap(map->page); in kvm_vcpu_unmap()
3160 memunmap(map->hva); in kvm_vcpu_unmap()
3163 if (map->writable) in kvm_vcpu_unmap()
3164 kvm_vcpu_mark_page_dirty(vcpu, map->gfn); in kvm_vcpu_unmap()
3166 if (map->pinned_page) { in kvm_vcpu_unmap()
3167 if (map->writable) in kvm_vcpu_unmap()
3168 kvm_set_page_dirty(map->pinned_page); in kvm_vcpu_unmap()
3169 kvm_set_page_accessed(map->pinned_page); in kvm_vcpu_unmap()
3170 unpin_user_page(map->pinned_page); in kvm_vcpu_unmap()
3173 map->hva = NULL; in kvm_vcpu_unmap()
3174 map->page = NULL; in kvm_vcpu_unmap()
3175 map->pinned_page = NULL; in kvm_vcpu_unmap()
3181 if (len > PAGE_SIZE - offset) in next_segment()
3182 return PAGE_SIZE - offset; in next_segment()
3195 return -EFAULT; in __kvm_read_guest_page()
3199 return -EFAULT; in __kvm_read_guest_page()
3202 return -EFAULT; in __kvm_read_guest_page()
3236 len -= seg; in kvm_read_guest()
3256 len -= seg; in kvm_vcpu_read_guest()
3271 return -EFAULT; in __kvm_read_guest_atomic()
3275 return -EFAULT; in __kvm_read_guest_atomic()
3280 return -EFAULT; in __kvm_read_guest_atomic()
3304 return -EFAULT; in __kvm_write_guest_page()
3308 return -EFAULT; in __kvm_write_guest_page()
3311 return -EFAULT; in __kvm_write_guest_page()
3330 return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); in kvm_vcpu_write_guest_page()
3347 len -= seg; in kvm_write_guest()
3368 len -= seg; in kvm_vcpu_write_guest()
3382 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; in __kvm_gfn_to_hva_cache_init()
3383 gfn_t nr_pages_needed = end_gfn - start_gfn + 1; in __kvm_gfn_to_hva_cache_init()
3386 /* Update ghc->generation before performing any error checks. */ in __kvm_gfn_to_hva_cache_init()
3387 ghc->generation = slots->generation; in __kvm_gfn_to_hva_cache_init()
3390 ghc->hva = KVM_HVA_ERR_BAD; in __kvm_gfn_to_hva_cache_init()
3391 return -EINVAL; in __kvm_gfn_to_hva_cache_init()
3399 ghc->memslot = __gfn_to_memslot(slots, start_gfn); in __kvm_gfn_to_hva_cache_init()
3400 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, in __kvm_gfn_to_hva_cache_init()
3402 if (kvm_is_error_hva(ghc->hva)) in __kvm_gfn_to_hva_cache_init()
3403 return -EFAULT; in __kvm_gfn_to_hva_cache_init()
3408 ghc->hva += offset; in __kvm_gfn_to_hva_cache_init()
3410 ghc->memslot = NULL; in __kvm_gfn_to_hva_cache_init()
3412 ghc->gpa = gpa; in __kvm_gfn_to_hva_cache_init()
3413 ghc->len = len; in __kvm_gfn_to_hva_cache_init()
3431 gpa_t gpa = ghc->gpa + offset; in kvm_write_guest_offset_cached()
3433 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_write_guest_offset_cached()
3434 return -EINVAL; in kvm_write_guest_offset_cached()
3436 if (slots->generation != ghc->generation) { in kvm_write_guest_offset_cached()
3437 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_write_guest_offset_cached()
3438 return -EFAULT; in kvm_write_guest_offset_cached()
3441 if (kvm_is_error_hva(ghc->hva)) in kvm_write_guest_offset_cached()
3442 return -EFAULT; in kvm_write_guest_offset_cached()
3444 if (unlikely(!ghc->memslot)) in kvm_write_guest_offset_cached()
3447 r = __copy_to_user((void __user *)ghc->hva + offset, data, len); in kvm_write_guest_offset_cached()
3449 return -EFAULT; in kvm_write_guest_offset_cached()
3450 mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); in kvm_write_guest_offset_cached()
3469 gpa_t gpa = ghc->gpa + offset; in kvm_read_guest_offset_cached()
3471 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_read_guest_offset_cached()
3472 return -EINVAL; in kvm_read_guest_offset_cached()
3474 if (slots->generation != ghc->generation) { in kvm_read_guest_offset_cached()
3475 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_read_guest_offset_cached()
3476 return -EFAULT; in kvm_read_guest_offset_cached()
3479 if (kvm_is_error_hva(ghc->hva)) in kvm_read_guest_offset_cached()
3480 return -EFAULT; in kvm_read_guest_offset_cached()
3482 if (unlikely(!ghc->memslot)) in kvm_read_guest_offset_cached()
3485 r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); in kvm_read_guest_offset_cached()
3487 return -EFAULT; in kvm_read_guest_offset_cached()
3513 len -= seg; in kvm_clear_guest()
3527 if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) in mark_page_dirty_in_slot()
3534 unsigned long rel_gfn = gfn - memslot->base_gfn; in mark_page_dirty_in_slot()
3535 u32 slot = (memslot->as_id << 16) | memslot->id; in mark_page_dirty_in_slot()
3537 if (kvm->dirty_ring_size && vcpu) in mark_page_dirty_in_slot()
3539 else if (memslot->dirty_bitmap) in mark_page_dirty_in_slot()
3540 set_bit_le(rel_gfn, memslot->dirty_bitmap); in mark_page_dirty_in_slot()
3559 mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); in kvm_vcpu_mark_page_dirty()
3565 if (!vcpu->sigset_active) in kvm_sigset_activate()
3569 * This does a lockless modification of ->real_blocked, which is fine in kvm_sigset_activate()
3570 * because, only current can change ->real_blocked and all readers of in kvm_sigset_activate()
3571 * ->real_blocked don't care as long ->real_blocked is always a subset in kvm_sigset_activate()
3572 * of ->blocked. in kvm_sigset_activate()
3574 sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); in kvm_sigset_activate()
3579 if (!vcpu->sigset_active) in kvm_sigset_deactivate()
3582 sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); in kvm_sigset_deactivate()
3583 sigemptyset(¤t->real_blocked); in kvm_sigset_deactivate()
3590 old = val = vcpu->halt_poll_ns; in grow_halt_poll_ns()
3600 vcpu->halt_poll_ns = val; in grow_halt_poll_ns()
3602 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); in grow_halt_poll_ns()
3609 old = val = vcpu->halt_poll_ns; in shrink_halt_poll_ns()
3620 vcpu->halt_poll_ns = val; in shrink_halt_poll_ns()
3621 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); in shrink_halt_poll_ns()
3626 int ret = -EINTR; in kvm_vcpu_check_block()
3627 int idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_check_block()
3640 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_check_block()
3647 * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
3654 vcpu->stat.generic.blocking = 1; in kvm_vcpu_block()
3676 vcpu->stat.generic.blocking = 0; in kvm_vcpu_block()
3684 struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic; in update_halt_poll_stats()
3687 ++vcpu->stat.generic.halt_attempted_poll; in update_halt_poll_stats()
3690 ++vcpu->stat.generic.halt_successful_poll; in update_halt_poll_stats()
3693 ++vcpu->stat.generic.halt_poll_invalid; in update_halt_poll_stats()
3695 stats->halt_poll_success_ns += poll_ns; in update_halt_poll_stats()
3696 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns); in update_halt_poll_stats()
3698 stats->halt_poll_fail_ns += poll_ns; in update_halt_poll_stats()
3699 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns); in update_halt_poll_stats()
3705 struct kvm *kvm = vcpu->kvm; in kvm_vcpu_max_halt_poll_ns()
3707 if (kvm->override_halt_poll_ns) { in kvm_vcpu_max_halt_poll_ns()
3709 * Ensure kvm->max_halt_poll_ns is not read before in kvm_vcpu_max_halt_poll_ns()
3710 * kvm->override_halt_poll_ns. in kvm_vcpu_max_halt_poll_ns()
3715 return READ_ONCE(kvm->max_halt_poll_ns); in kvm_vcpu_max_halt_poll_ns()
3736 if (vcpu->halt_poll_ns > max_halt_poll_ns) in kvm_vcpu_halt()
3737 vcpu->halt_poll_ns = max_halt_poll_ns; in kvm_vcpu_halt()
3739 do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; in kvm_vcpu_halt()
3743 ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); in kvm_vcpu_halt()
3757 vcpu->stat.generic.halt_wait_ns += in kvm_vcpu_halt()
3758 ktime_to_ns(cur) - ktime_to_ns(poll_end); in kvm_vcpu_halt()
3759 KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist, in kvm_vcpu_halt()
3760 ktime_to_ns(cur) - ktime_to_ns(poll_end)); in kvm_vcpu_halt()
3764 halt_ns = ktime_to_ns(cur) - ktime_to_ns(start); in kvm_vcpu_halt()
3767 * Note, halt-polling is considered successful so long as the vCPU was in kvm_vcpu_halt()
3769 * after of the halt-polling loop itself, but before the full wait. in kvm_vcpu_halt()
3781 if (halt_ns <= vcpu->halt_poll_ns) in kvm_vcpu_halt()
3784 else if (vcpu->halt_poll_ns && in kvm_vcpu_halt()
3788 else if (vcpu->halt_poll_ns < max_halt_poll_ns && in kvm_vcpu_halt()
3792 vcpu->halt_poll_ns = 0; in kvm_vcpu_halt()
3803 WRITE_ONCE(vcpu->ready, true); in kvm_vcpu_wake_up()
3804 ++vcpu->stat.generic.halt_wakeup; in kvm_vcpu_wake_up()
3831 if (vcpu->mode == IN_GUEST_MODE) in __kvm_vcpu_kick()
3832 WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); in __kvm_vcpu_kick()
3844 cpu = READ_ONCE(vcpu->cpu); in __kvm_vcpu_kick()
3851 * deadlock due to taking cross-CPU locks. in __kvm_vcpu_kick()
3870 if (!read_trylock(&target->pid_lock)) in kvm_vcpu_yield_to()
3873 if (target->pid) in kvm_vcpu_yield_to()
3874 task = get_pid_task(target->pid, PIDTYPE_PID); in kvm_vcpu_yield_to()
3876 read_unlock(&target->pid_lock); in kvm_vcpu_yield_to()
3891 * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
3895 * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
3900 * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
3901 * to preempted lock-holder could result in wrong VCPU selection and CPU
3902 * burning. Giving priority for a potential lock-holder increases lock
3905 * Since algorithm is based on heuristics, accessing another VCPU data without
3914 eligible = !vcpu->spin_loop.in_spin_loop || in kvm_vcpu_eligible_for_directed_yield()
3915 vcpu->spin_loop.dy_eligible; in kvm_vcpu_eligible_for_directed_yield()
3917 if (vcpu->spin_loop.in_spin_loop) in kvm_vcpu_eligible_for_directed_yield()
3918 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); in kvm_vcpu_eligible_for_directed_yield()
3942 if (!list_empty_careful(&vcpu->async_pf.done)) in vcpu_dy_runnable()
3954 * directly for cross-vCPU checks is functionally correct and accurate.
3969 struct kvm *kvm = me->kvm; in kvm_vcpu_on_spin()
3973 nr_vcpus = atomic_read(&kvm->online_vcpus); in kvm_vcpu_on_spin()
3993 * approximate a round-robin selection by iterating over all vCPUs, in kvm_vcpu_on_spin()
3994 * starting at the last boosted vCPU. I.e. if N=kvm->last_boosted_vcpu, in kvm_vcpu_on_spin()
3995 * iterate over vCPU[N+1]..vCPU[N-1], wrapping as needed. in kvm_vcpu_on_spin()
4001 start = READ_ONCE(kvm->last_boosted_vcpu) + 1; in kvm_vcpu_on_spin()
4004 if (idx == me->vcpu_idx) in kvm_vcpu_on_spin()
4007 vcpu = xa_load(&kvm->vcpu_array, idx); in kvm_vcpu_on_spin()
4008 if (!READ_ONCE(vcpu->ready)) in kvm_vcpu_on_spin()
4014 * Treat the target vCPU as being in-kernel if it has a pending in kvm_vcpu_on_spin()
4016 * waiting on IPI delivery, i.e. the target vCPU is in-kernel in kvm_vcpu_on_spin()
4019 if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && in kvm_vcpu_on_spin()
4029 WRITE_ONCE(kvm->last_boosted_vcpu, i); in kvm_vcpu_on_spin()
4031 } else if (yielded < 0 && !--try) { in kvm_vcpu_on_spin()
4047 kvm->dirty_ring_size / PAGE_SIZE); in kvm_page_in_dirty_ring()
4055 struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; in kvm_vcpu_fault()
4058 if (vmf->pgoff == 0) in kvm_vcpu_fault()
4059 page = virt_to_page(vcpu->run); in kvm_vcpu_fault()
4061 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) in kvm_vcpu_fault()
4062 page = virt_to_page(vcpu->arch.pio_data); in kvm_vcpu_fault()
4065 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) in kvm_vcpu_fault()
4066 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); in kvm_vcpu_fault()
4068 else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) in kvm_vcpu_fault()
4070 &vcpu->dirty_ring, in kvm_vcpu_fault()
4071 vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); in kvm_vcpu_fault()
4075 vmf->page = page; in kvm_vcpu_fault()
4085 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_mmap()
4088 if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || in kvm_vcpu_mmap()
4089 kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && in kvm_vcpu_mmap()
4090 ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) in kvm_vcpu_mmap()
4091 return -EINVAL; in kvm_vcpu_mmap()
4093 vma->vm_ops = &kvm_vcpu_vm_ops; in kvm_vcpu_mmap()
4099 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_release()
4101 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_release()
4120 snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); in create_vcpu_fd()
4129 read_lock(&vcpu->pid_lock); in vcpu_get_pid()
4130 *val = pid_nr(vcpu->pid); in vcpu_get_pid()
4131 read_unlock(&vcpu->pid_lock); in vcpu_get_pid()
4145 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); in kvm_create_vcpu_debugfs()
4147 vcpu->kvm->debugfs_dentry); in kvm_create_vcpu_debugfs()
4166 * too-large values instead of silently truncating. in kvm_vm_ioctl_create_vcpu()
4168 * Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first in kvm_vm_ioctl_create_vcpu()
4174 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4176 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4177 if (kvm->created_vcpus >= kvm->max_vcpus) { in kvm_vm_ioctl_create_vcpu()
4178 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4179 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4184 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4188 kvm->created_vcpus++; in kvm_vm_ioctl_create_vcpu()
4189 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4193 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4200 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4203 vcpu->run = page_address(page); in kvm_vm_ioctl_create_vcpu()
4211 if (kvm->dirty_ring_size) { in kvm_vm_ioctl_create_vcpu()
4212 r = kvm_dirty_ring_alloc(kvm, &vcpu->dirty_ring, in kvm_vm_ioctl_create_vcpu()
4213 id, kvm->dirty_ring_size); in kvm_vm_ioctl_create_vcpu()
4218 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4221 r = -EEXIST; in kvm_vm_ioctl_create_vcpu()
4225 vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4226 r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); in kvm_vm_ioctl_create_vcpu()
4227 WARN_ON_ONCE(r == -EBUSY); in kvm_vm_ioctl_create_vcpu()
4235 * into a NULL-pointer dereference because KVM thinks the _current_ in kvm_vm_ioctl_create_vcpu()
4236 * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep in kvm_vm_ioctl_create_vcpu()
4237 * knows it's taken *inside* kvm->lock. in kvm_vm_ioctl_create_vcpu()
4239 mutex_lock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4247 * pointer before kvm->online_vcpu's incremented value. in kvm_vm_ioctl_create_vcpu()
4250 atomic_inc(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4251 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4253 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4259 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4261 xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); in kvm_vm_ioctl_create_vcpu()
4263 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4264 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vm_ioctl_create_vcpu()
4268 free_page((unsigned long)vcpu->run); in kvm_vm_ioctl_create_vcpu()
4272 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4273 kvm->created_vcpus--; in kvm_vm_ioctl_create_vcpu()
4274 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4282 vcpu->sigset_active = 1; in kvm_vcpu_ioctl_set_sigmask()
4283 vcpu->sigset = *sigset; in kvm_vcpu_ioctl_set_sigmask()
4285 vcpu->sigset_active = 0; in kvm_vcpu_ioctl_set_sigmask()
4292 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_read()
4294 return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, in kvm_vcpu_stats_read()
4295 &kvm_vcpu_stats_desc[0], &vcpu->stat, in kvm_vcpu_stats_read()
4296 sizeof(vcpu->stat), user_buffer, size, offset); in kvm_vcpu_stats_read()
4301 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_release()
4303 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_stats_release()
4320 snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); in kvm_vcpu_ioctl_get_stats_fd()
4333 kvm_get_kvm(vcpu->kvm); in kvm_vcpu_ioctl_get_stats_fd()
4347 if (range->flags) in kvm_vcpu_pre_fault_memory()
4348 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4350 if (!PAGE_ALIGNED(range->gpa) || in kvm_vcpu_pre_fault_memory()
4351 !PAGE_ALIGNED(range->size) || in kvm_vcpu_pre_fault_memory()
4352 range->gpa + range->size <= range->gpa) in kvm_vcpu_pre_fault_memory()
4353 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4356 idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_pre_fault_memory()
4358 full_size = range->size; in kvm_vcpu_pre_fault_memory()
4361 r = -EINTR; in kvm_vcpu_pre_fault_memory()
4366 if (WARN_ON_ONCE(r == 0 || r == -EIO)) in kvm_vcpu_pre_fault_memory()
4372 range->size -= r; in kvm_vcpu_pre_fault_memory()
4373 range->gpa += r; in kvm_vcpu_pre_fault_memory()
4375 } while (range->size); in kvm_vcpu_pre_fault_memory()
4377 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_pre_fault_memory()
4381 return full_size == range->size ? r : 0; in kvm_vcpu_pre_fault_memory()
4387 struct kvm *kvm = vcpu->kvm; in kvm_wait_for_vcpu_online()
4390 * In practice, this happy path will always be taken, as a well-behaved in kvm_wait_for_vcpu_online()
4393 if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus))) in kvm_wait_for_vcpu_online()
4401 if (mutex_lock_killable(&vcpu->mutex)) in kvm_wait_for_vcpu_online()
4402 return -EINTR; in kvm_wait_for_vcpu_online()
4404 mutex_unlock(&vcpu->mutex); in kvm_wait_for_vcpu_online()
4406 if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx))) in kvm_wait_for_vcpu_online()
4407 return -EIO; in kvm_wait_for_vcpu_online()
4415 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_ioctl()
4421 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_ioctl()
4422 return -EIO; in kvm_vcpu_ioctl()
4425 return -EINVAL; in kvm_vcpu_ioctl()
4441 if (r != -ENOIOCTLCMD) in kvm_vcpu_ioctl()
4444 if (mutex_lock_killable(&vcpu->mutex)) in kvm_vcpu_ioctl()
4445 return -EINTR; in kvm_vcpu_ioctl()
4449 r = -EINVAL; in kvm_vcpu_ioctl()
4454 * Note, vcpu->pid is primarily protected by vcpu->mutex. The in kvm_vcpu_ioctl()
4456 * read vcpu->pid while this vCPU is in KVM_RUN, e.g. to yield in kvm_vcpu_ioctl()
4459 oldpid = vcpu->pid; in kvm_vcpu_ioctl()
4469 write_lock(&vcpu->pid_lock); in kvm_vcpu_ioctl()
4470 vcpu->pid = newpid; in kvm_vcpu_ioctl()
4471 write_unlock(&vcpu->pid_lock); in kvm_vcpu_ioctl()
4475 vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); in kvm_vcpu_ioctl()
4477 vcpu->wants_to_run = false; in kvm_vcpu_ioctl()
4479 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); in kvm_vcpu_ioctl()
4485 r = -ENOMEM; in kvm_vcpu_ioctl()
4492 r = -EFAULT; in kvm_vcpu_ioctl()
4514 r = -ENOMEM; in kvm_vcpu_ioctl()
4520 r = -EFAULT; in kvm_vcpu_ioctl()
4542 r = -EFAULT; in kvm_vcpu_ioctl()
4551 r = -EFAULT; in kvm_vcpu_ioctl()
4560 r = -EFAULT; in kvm_vcpu_ioctl()
4566 r = -EFAULT; in kvm_vcpu_ioctl()
4575 r = -EFAULT; in kvm_vcpu_ioctl()
4588 r = -EFAULT; in kvm_vcpu_ioctl()
4592 r = -EINVAL; in kvm_vcpu_ioctl()
4595 r = -EFAULT; in kvm_vcpu_ioctl()
4596 if (copy_from_user(&sigset, sigmask_arg->sigset, in kvm_vcpu_ioctl()
4606 r = -ENOMEM; in kvm_vcpu_ioctl()
4612 r = -EFAULT; in kvm_vcpu_ioctl()
4636 r = -EFAULT; in kvm_vcpu_ioctl()
4642 r = -EFAULT; in kvm_vcpu_ioctl()
4650 mutex_unlock(&vcpu->mutex); in kvm_vcpu_ioctl()
4660 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_compat_ioctl()
4664 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_compat_ioctl()
4665 return -EIO; in kvm_vcpu_compat_ioctl()
4674 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4678 r = -EINVAL; in kvm_vcpu_compat_ioctl()
4681 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4683 (compat_sigset_t __user *)sigmask_arg->sigset)) in kvm_vcpu_compat_ioctl()
4701 struct kvm_device *dev = filp->private_data; in kvm_device_mmap()
4703 if (dev->ops->mmap) in kvm_device_mmap()
4704 return dev->ops->mmap(dev, vma); in kvm_device_mmap()
4706 return -ENODEV; in kvm_device_mmap()
4717 return -EPERM; in kvm_device_ioctl_attr()
4720 return -EFAULT; in kvm_device_ioctl_attr()
4728 struct kvm_device *dev = filp->private_data; in kvm_device_ioctl()
4730 if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) in kvm_device_ioctl()
4731 return -EIO; in kvm_device_ioctl()
4735 return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); in kvm_device_ioctl()
4737 return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); in kvm_device_ioctl()
4739 return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); in kvm_device_ioctl()
4741 if (dev->ops->ioctl) in kvm_device_ioctl()
4742 return dev->ops->ioctl(dev, ioctl, arg); in kvm_device_ioctl()
4744 return -ENOTTY; in kvm_device_ioctl()
4750 struct kvm_device *dev = filp->private_data; in kvm_device_release()
4751 struct kvm *kvm = dev->kvm; in kvm_device_release()
4753 if (dev->ops->release) { in kvm_device_release()
4754 mutex_lock(&kvm->lock); in kvm_device_release()
4755 list_del_rcu(&dev->vm_node); in kvm_device_release()
4757 dev->ops->release(dev); in kvm_device_release()
4758 mutex_unlock(&kvm->lock); in kvm_device_release()
4774 if (filp->f_op != &kvm_device_fops) in kvm_device_from_filp()
4777 return filp->private_data; in kvm_device_from_filp()
4790 return -ENOSPC; in kvm_register_device_ops()
4793 return -EEXIST; in kvm_register_device_ops()
4806 struct kvm_create_device *cd) in kvm_ioctl_create_device() argument
4810 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; in kvm_ioctl_create_device() local
4814 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) in kvm_ioctl_create_device()
4815 return -ENODEV; in kvm_ioctl_create_device()
4817 type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); in kvm_ioctl_create_device()
4820 return -ENODEV; in kvm_ioctl_create_device()
4822 if (test) in kvm_ioctl_create_device()
4827 return -ENOMEM; in kvm_ioctl_create_device()
4829 dev->ops = ops; in kvm_ioctl_create_device()
4830 dev->kvm = kvm; in kvm_ioctl_create_device()
4832 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4833 ret = ops->create(dev, type); in kvm_ioctl_create_device()
4835 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4839 list_add_rcu(&dev->vm_node, &kvm->devices); in kvm_ioctl_create_device()
4840 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4842 if (ops->init) in kvm_ioctl_create_device()
4843 ops->init(dev); in kvm_ioctl_create_device()
4846 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); in kvm_ioctl_create_device()
4849 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4850 list_del_rcu(&dev->vm_node); in kvm_ioctl_create_device()
4852 if (ops->release) in kvm_ioctl_create_device()
4853 ops->release(dev); in kvm_ioctl_create_device()
4854 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4855 if (ops->destroy) in kvm_ioctl_create_device()
4856 ops->destroy(dev); in kvm_ioctl_create_device()
4860 cd->fd = ret; in kvm_ioctl_create_device()
4945 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4948 if (!size || (size & (size - 1))) in kvm_vm_ioctl_enable_dirty_log_ring()
4949 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4954 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4958 return -E2BIG; in kvm_vm_ioctl_enable_dirty_log_ring()
4961 if (kvm->dirty_ring_size) in kvm_vm_ioctl_enable_dirty_log_ring()
4962 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4964 mutex_lock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4966 if (kvm->created_vcpus) { in kvm_vm_ioctl_enable_dirty_log_ring()
4968 r = -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4970 kvm->dirty_ring_size = size; in kvm_vm_ioctl_enable_dirty_log_ring()
4974 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4984 if (!kvm->dirty_ring_size) in kvm_vm_ioctl_reset_dirty_pages()
4985 return -EINVAL; in kvm_vm_ioctl_reset_dirty_pages()
4987 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4990 r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared); in kvm_vm_ioctl_reset_dirty_pages()
4995 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
5006 return -EINVAL; in kvm_vm_ioctl_enable_cap()
5013 lockdep_assert_held(&kvm->slots_lock); in kvm_are_all_memslots_empty()
5027 switch (cap->cap) { in kvm_vm_ioctl_enable_cap_generic()
5032 if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) in kvm_vm_ioctl_enable_cap_generic()
5035 if (cap->flags || (cap->args[0] & ~allowed_options)) in kvm_vm_ioctl_enable_cap_generic()
5036 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5037 kvm->manual_dirty_log_protect = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
5042 if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) in kvm_vm_ioctl_enable_cap_generic()
5043 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5045 kvm->max_halt_poll_ns = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
5048 * Ensure kvm->override_halt_poll_ns does not become visible in kvm_vm_ioctl_enable_cap_generic()
5049 * before kvm->max_halt_poll_ns. in kvm_vm_ioctl_enable_cap_generic()
5054 kvm->override_halt_poll_ns = true; in kvm_vm_ioctl_enable_cap_generic()
5060 if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) in kvm_vm_ioctl_enable_cap_generic()
5061 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5063 return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); in kvm_vm_ioctl_enable_cap_generic()
5065 int r = -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5068 !kvm->dirty_ring_size || cap->flags) in kvm_vm_ioctl_enable_cap_generic()
5071 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
5079 kvm->dirty_ring_with_bitmap = true; in kvm_vm_ioctl_enable_cap_generic()
5083 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
5095 struct kvm *kvm = file->private_data; in kvm_vm_stats_read()
5097 return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, in kvm_vm_stats_read()
5098 &kvm_vm_stats_desc[0], &kvm->stat, in kvm_vm_stats_read()
5099 sizeof(kvm->stat), user_buffer, size, offset); in kvm_vm_stats_read()
5104 struct kvm *kvm = file->private_data; in kvm_vm_stats_release()
5126 file = anon_inode_getfile_fmode("kvm-vm-stats", in kvm_vm_ioctl_get_stats_fd()
5150 struct kvm *kvm = filp->private_data; in kvm_vm_ioctl()
5154 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_ioctl()
5155 return -EIO; in kvm_vm_ioctl()
5163 r = -EFAULT; in kvm_vm_ioctl()
5192 r = -EFAULT; in kvm_vm_ioctl()
5196 r = -EINVAL; in kvm_vm_ioctl()
5207 r = -EFAULT; in kvm_vm_ioctl()
5217 r = -EFAULT; in kvm_vm_ioctl()
5228 r = -EFAULT; in kvm_vm_ioctl()
5237 r = -EFAULT; in kvm_vm_ioctl()
5247 r = -EFAULT; in kvm_vm_ioctl()
5256 r = -EFAULT; in kvm_vm_ioctl()
5266 r = -EFAULT; in kvm_vm_ioctl()
5278 r = -EFAULT; in kvm_vm_ioctl()
5287 r = -EFAULT; in kvm_vm_ioctl()
5303 r = -EFAULT; in kvm_vm_ioctl()
5306 r = -EINVAL; in kvm_vm_ioctl()
5315 entries = vmemdup_array_user(urouting->entries, in kvm_vm_ioctl()
5332 r = -EFAULT; in kvm_vm_ioctl()
5341 struct kvm_create_device cd; in kvm_vm_ioctl() local
5343 r = -EFAULT; in kvm_vm_ioctl()
5344 if (copy_from_user(&cd, argp, sizeof(cd))) in kvm_vm_ioctl()
5347 r = kvm_ioctl_create_device(kvm, &cd); in kvm_vm_ioctl()
5351 r = -EFAULT; in kvm_vm_ioctl()
5352 if (copy_to_user(argp, &cd, sizeof(cd))) in kvm_vm_ioctl()
5371 r = -EFAULT; in kvm_vm_ioctl()
5409 return -ENOTTY; in kvm_arch_vm_compat_ioctl()
5415 struct kvm *kvm = filp->private_data; in kvm_vm_compat_ioctl()
5418 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_compat_ioctl()
5419 return -EIO; in kvm_vm_compat_ioctl()
5422 if (r != -ENOTTY) in kvm_vm_compat_ioctl()
5433 return -EFAULT; in kvm_vm_compat_ioctl()
5450 return -EFAULT; in kvm_vm_compat_ioctl()
5475 return file && file->f_op == &kvm_vm_fops; in file_is_kvm()
5498 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); in kvm_dev_ioctl_create_vm()
5505 * Don't call kvm_put_kvm anymore at this point; file->f_op is in kvm_dev_ioctl_create_vm()
5506 * already set, with ->release() being kvm_vm_release(). In error in kvm_dev_ioctl_create_vm()
5525 int r = -EINVAL; in kvm_dev_ioctl()
5599 return -EIO; in kvm_enable_virtualization_cpu()
5701 * If userspace initiated a forced reboot, e.g. reboot -f, then it's in kvm_enable_virtualization()
5702 * possible for an in-flight operation to enable virtualization after in kvm_enable_virtualization()
5703 * syscore_shutdown() is called, i.e. without kvm_shutdown() being in kvm_enable_virtualization()
5712 r = -EBUSY; in kvm_enable_virtualization()
5723 --kvm_usage_count; in kvm_enable_virtualization()
5732 if (--kvm_usage_count) in kvm_disable_virtualization()
5768 if (dev->ops->destructor) in kvm_iodevice_destructor()
5769 dev->ops->destructor(dev); in kvm_iodevice_destructor()
5776 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_destroy()
5777 struct kvm_io_device *pos = bus->range[i].dev; in kvm_io_bus_destroy()
5787 gpa_t addr1 = r1->addr; in kvm_io_bus_cmp()
5788 gpa_t addr2 = r2->addr; in kvm_io_bus_cmp()
5791 return -1; in kvm_io_bus_cmp()
5793 /* If r2->len == 0, match the exact address. If r2->len != 0, in kvm_io_bus_cmp()
5798 if (r2->len) { in kvm_io_bus_cmp()
5799 addr1 += r1->len; in kvm_io_bus_cmp()
5800 addr2 += r2->len; in kvm_io_bus_cmp()
5825 range = bsearch(&key, bus->range, bus->dev_count, in kvm_io_bus_get_first_dev()
5828 return -ENOENT; in kvm_io_bus_get_first_dev()
5830 off = range - bus->range; in kvm_io_bus_get_first_dev()
5832 while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) in kvm_io_bus_get_first_dev()
5833 off--; in kvm_io_bus_get_first_dev()
5843 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_write()
5845 return -EOPNOTSUPP; in __kvm_io_bus_write()
5847 while (idx < bus->dev_count && in __kvm_io_bus_write()
5848 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_write()
5849 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_write()
5850 range->len, val)) in __kvm_io_bus_write()
5855 return -EOPNOTSUPP; in __kvm_io_bus_write()
5867 return srcu_dereference(kvm->buses[idx], &kvm->srcu); in kvm_get_bus_srcu()
5882 bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); in kvm_io_bus_write()
5884 return -ENOMEM; in kvm_io_bus_write()
5901 bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); in kvm_io_bus_write_cookie()
5903 return -ENOMEM; in kvm_io_bus_write_cookie()
5906 if ((cookie >= 0) && (cookie < bus->dev_count) && in kvm_io_bus_write_cookie()
5907 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) in kvm_io_bus_write_cookie()
5908 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, in kvm_io_bus_write_cookie()
5924 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_read()
5926 return -EOPNOTSUPP; in __kvm_io_bus_read()
5928 while (idx < bus->dev_count && in __kvm_io_bus_read()
5929 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_read()
5930 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_read()
5931 range->len, val)) in __kvm_io_bus_read()
5936 return -EOPNOTSUPP; in __kvm_io_bus_read()
5951 bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); in kvm_io_bus_read()
5953 return -ENOMEM; in kvm_io_bus_read()
5973 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_register_dev()
5977 return -ENOMEM; in kvm_io_bus_register_dev()
5980 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) in kvm_io_bus_register_dev()
5981 return -ENOSPC; in kvm_io_bus_register_dev()
5983 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), in kvm_io_bus_register_dev()
5986 return -ENOMEM; in kvm_io_bus_register_dev()
5994 for (i = 0; i < bus->dev_count; i++) in kvm_io_bus_register_dev()
5995 if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) in kvm_io_bus_register_dev()
5999 new_bus->dev_count++; in kvm_io_bus_register_dev()
6000 new_bus->range[i] = range; in kvm_io_bus_register_dev()
6001 memcpy(new_bus->range + i + 1, bus->range + i, in kvm_io_bus_register_dev()
6002 (bus->dev_count - i) * sizeof(struct kvm_io_range)); in kvm_io_bus_register_dev()
6003 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_register_dev()
6004 call_srcu(&kvm->srcu, &bus->rcu, __free_bus); in kvm_io_bus_register_dev()
6015 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_unregister_dev()
6021 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_unregister_dev()
6022 if (bus->range[i].dev == dev) { in kvm_io_bus_unregister_dev()
6027 if (i == bus->dev_count) in kvm_io_bus_unregister_dev()
6030 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), in kvm_io_bus_unregister_dev()
6034 new_bus->dev_count--; in kvm_io_bus_unregister_dev()
6035 memcpy(new_bus->range + i, bus->range + i + 1, in kvm_io_bus_unregister_dev()
6036 flex_array_size(new_bus, range, new_bus->dev_count - i)); in kvm_io_bus_unregister_dev()
6039 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_unregister_dev()
6040 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_unregister_dev()
6049 return -ENOMEM; in kvm_io_bus_unregister_dev()
6064 srcu_idx = srcu_read_lock(&kvm->srcu); in kvm_io_bus_get_dev()
6074 iodev = bus->range[dev_idx].dev; in kvm_io_bus_get_dev()
6077 srcu_read_unlock(&kvm->srcu, srcu_idx); in kvm_io_bus_get_dev()
6088 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_open()
6095 if (!kvm_get_kvm_safe(stat_data->kvm)) in kvm_debugfs_open()
6096 return -ENOENT; in kvm_debugfs_open()
6099 kvm_stats_debugfs_mode(stat_data->desc) & 0222 in kvm_debugfs_open()
6102 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_open()
6109 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_release()
6112 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_release()
6119 *val = *(u64 *)((void *)(&kvm->stat) + offset); in kvm_get_stat_per_vm()
6126 *(u64 *)((void *)(&kvm->stat) + offset) = 0; in kvm_clear_stat_per_vm()
6139 *val += *(u64 *)((void *)(&vcpu->stat) + offset); in kvm_get_stat_per_vcpu()
6150 *(u64 *)((void *)(&vcpu->stat) + offset) = 0; in kvm_clear_stat_per_vcpu()
6157 int r = -EFAULT; in kvm_stat_data_get()
6160 switch (stat_data->kind) { in kvm_stat_data_get()
6162 r = kvm_get_stat_per_vm(stat_data->kvm, in kvm_stat_data_get()
6163 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6166 r = kvm_get_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_get()
6167 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6176 int r = -EFAULT; in kvm_stat_data_clear()
6180 return -EINVAL; in kvm_stat_data_clear()
6182 switch (stat_data->kind) { in kvm_stat_data_clear()
6184 r = kvm_clear_stat_per_vm(stat_data->kvm, in kvm_stat_data_clear()
6185 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6188 r = kvm_clear_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_clear()
6189 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6233 return -EINVAL; in vm_stat_clear()
6269 return -EINVAL; in vcpu_stat_clear()
6297 kvm_active_vms--; in kvm_uevent_notify_change()
6312 kvm->userspace_pid = task_pid_nr(current); in kvm_uevent_notify_change()
6316 add_uevent_var(env, "PID=%d", kvm->userspace_pid); in kvm_uevent_notify_change()
6318 if (!IS_ERR(kvm->debugfs_dentry)) { in kvm_uevent_notify_change()
6322 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); in kvm_uevent_notify_change()
6329 env->envp[env->envp_idx++] = NULL; in kvm_uevent_notify_change()
6330 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); in kvm_uevent_notify_change()
6348 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6350 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6359 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6361 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6375 WRITE_ONCE(vcpu->preempted, false); in kvm_sched_in()
6376 WRITE_ONCE(vcpu->ready, false); in kvm_sched_in()
6381 WRITE_ONCE(vcpu->scheduled_out, false); in kvm_sched_in()
6389 WRITE_ONCE(vcpu->scheduled_out, true); in kvm_sched_out()
6391 if (task_is_runnable(current) && vcpu->wants_to_run) { in kvm_sched_out()
6392 WRITE_ONCE(vcpu->preempted, true); in kvm_sched_out()
6393 WRITE_ONCE(vcpu->ready, true); in kvm_sched_out()
6400 * kvm_get_running_vcpu - get the vcpu running on the current CPU.
6402 * We can disable preemption locally around accessing the per-CPU variable,
6405 * the per-CPU value later will give us the same value as we update the
6406 * per-CPU variable in the preempt notifier handlers.
6421 * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
6485 - offsetof(struct kvm_vcpu, arch), in kvm_init()
6488 return -ENOMEM; in kvm_init()
6493 r = -ENOMEM; in kvm_init()