Lines Matching +full:bus +full:- +full:range

1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine (KVM) Hypervisor
74 MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor");
82 /* Default doubles per-vcpu halt_poll_ns. */
92 /* Default halves per-vcpu halt_poll_ns. */
107 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
132 * - Prevent a compat task from opening /dev/kvm
133 * - If the open has been done by a 64bit task, and the KVM fd
137 unsigned long arg) { return -EINVAL; } in kvm_no_compat_ioctl()
141 return is_compat_task() ? -ENODEV : 0; in kvm_no_compat_open()
149 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
171 preempt_notifier_register(&vcpu->preempt_notifier); in vcpu_load()
181 preempt_notifier_unregister(&vcpu->preempt_notifier); in vcpu_put()
240 cpu = READ_ONCE(vcpu->cpu); in kvm_make_vcpu_request()
241 if (cpu != -1 && cpu != current_cpu) in kvm_make_vcpu_request()
297 ++kvm->stat.generic.remote_tlb_flush_requests; in kvm_flush_remote_tlbs()
301 * mode. Pairs with a memory barrier in arch-specific code. in kvm_flush_remote_tlbs()
302 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest in kvm_flush_remote_tlbs()
304 * - powerpc: smp_mb in kvmppc_prepare_to_enter. in kvm_flush_remote_tlbs()
307 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that in kvm_flush_remote_tlbs()
312 ++kvm->stat.generic.remote_tlb_flush; in kvm_flush_remote_tlbs()
322 * Fall back to a flushing entire TLBs if the architecture range-based in kvm_flush_remote_tlbs_range()
339 lockdep_assert_held(&kvm->slots_lock); in kvm_flush_remote_tlbs_memslot()
340 kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); in kvm_flush_remote_tlbs_memslot()
355 gfp_flags |= mc->gfp_zero; in mmu_memory_cache_alloc_obj()
357 if (mc->kmem_cache) in mmu_memory_cache_alloc_obj()
358 return kmem_cache_alloc(mc->kmem_cache, gfp_flags); in mmu_memory_cache_alloc_obj()
361 if (page && mc->init_value) in mmu_memory_cache_alloc_obj()
362 memset64(page, mc->init_value, PAGE_SIZE / sizeof(u64)); in mmu_memory_cache_alloc_obj()
368 gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; in __kvm_mmu_topup_memory_cache()
371 if (mc->nobjs >= min) in __kvm_mmu_topup_memory_cache()
374 if (unlikely(!mc->objects)) { in __kvm_mmu_topup_memory_cache()
376 return -EIO; in __kvm_mmu_topup_memory_cache()
382 if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) in __kvm_mmu_topup_memory_cache()
383 return -EIO; in __kvm_mmu_topup_memory_cache()
385 mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); in __kvm_mmu_topup_memory_cache()
386 if (!mc->objects) in __kvm_mmu_topup_memory_cache()
387 return -ENOMEM; in __kvm_mmu_topup_memory_cache()
389 mc->capacity = capacity; in __kvm_mmu_topup_memory_cache()
393 if (WARN_ON_ONCE(mc->capacity != capacity)) in __kvm_mmu_topup_memory_cache()
394 return -EIO; in __kvm_mmu_topup_memory_cache()
396 while (mc->nobjs < mc->capacity) { in __kvm_mmu_topup_memory_cache()
399 return mc->nobjs >= min ? 0 : -ENOMEM; in __kvm_mmu_topup_memory_cache()
400 mc->objects[mc->nobjs++] = obj; in __kvm_mmu_topup_memory_cache()
412 return mc->nobjs; in kvm_mmu_memory_cache_nr_free_objects()
417 while (mc->nobjs) { in kvm_mmu_free_memory_cache()
418 if (mc->kmem_cache) in kvm_mmu_free_memory_cache()
419 kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
421 free_page((unsigned long)mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
424 kvfree(mc->objects); in kvm_mmu_free_memory_cache()
426 mc->objects = NULL; in kvm_mmu_free_memory_cache()
427 mc->capacity = 0; in kvm_mmu_free_memory_cache()
434 if (WARN_ON(!mc->nobjs)) in kvm_mmu_memory_cache_alloc()
437 p = mc->objects[--mc->nobjs]; in kvm_mmu_memory_cache_alloc()
445 mutex_init(&vcpu->mutex); in kvm_vcpu_init()
446 vcpu->cpu = -1; in kvm_vcpu_init()
447 vcpu->kvm = kvm; in kvm_vcpu_init()
448 vcpu->vcpu_id = id; in kvm_vcpu_init()
449 vcpu->pid = NULL; in kvm_vcpu_init()
450 rwlock_init(&vcpu->pid_lock); in kvm_vcpu_init()
452 rcuwait_init(&vcpu->wait); in kvm_vcpu_init()
458 vcpu->preempted = false; in kvm_vcpu_init()
459 vcpu->ready = false; in kvm_vcpu_init()
460 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); in kvm_vcpu_init()
461 vcpu->last_used_slot = NULL; in kvm_vcpu_init()
464 snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", in kvm_vcpu_init()
471 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vcpu_destroy()
475 * the vcpu->pid pointer, and at destruction time all file descriptors in kvm_vcpu_destroy()
478 put_pid(vcpu->pid); in kvm_vcpu_destroy()
480 free_page((unsigned long)vcpu->run); in kvm_vcpu_destroy()
491 xa_erase(&kvm->vcpu_array, i); in kvm_destroy_vcpus()
494 atomic_set(&kvm->online_vcpus, 0); in kvm_destroy_vcpus()
504 typedef bool (*gfn_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
510 * 64-bit addresses, as KVM notifiers can operate on host virtual
511 * addresses (unsigned long) and guest physical addresses (64-bit).
523 * The inner-most helper returns a tuple containing the return value from the
524 * arch- and action-specific handler, plus a flag indicating whether or not at
528 * return from arch code as a bool, outer helpers will cast it to an int. :-(
538 * function will have a non-zero address, and so it will generate code to
548 /* Iterate over each memslot intersecting [start, last] (inclusive) range */
550 for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
555 const struct kvm_mmu_notifier_range *range) in __kvm_handle_hva_range() argument
566 if (WARN_ON_ONCE(range->end <= range->start)) in __kvm_handle_hva_range()
570 if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && in __kvm_handle_hva_range()
571 IS_KVM_NULL_FN(range->handler))) in __kvm_handle_hva_range()
574 idx = srcu_read_lock(&kvm->srcu); in __kvm_handle_hva_range()
581 range->start, range->end - 1) { in __kvm_handle_hva_range()
584 slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); in __kvm_handle_hva_range()
585 hva_start = max_t(unsigned long, range->start, slot->userspace_addr); in __kvm_handle_hva_range()
586 hva_end = min_t(unsigned long, range->end, in __kvm_handle_hva_range()
587 slot->userspace_addr + (slot->npages << PAGE_SHIFT)); in __kvm_handle_hva_range()
591 * range is covered by zero or one memslots, don't in __kvm_handle_hva_range()
595 gfn_range.arg = range->arg; in __kvm_handle_hva_range()
596 gfn_range.may_block = range->may_block; in __kvm_handle_hva_range()
598 * HVA-based notifications aren't relevant to private in __kvm_handle_hva_range()
605 * {gfn_start, gfn_start+1, ..., gfn_end-1}. in __kvm_handle_hva_range()
608 gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); in __kvm_handle_hva_range()
614 if (!IS_KVM_NULL_FN(range->on_lock)) in __kvm_handle_hva_range()
615 range->on_lock(kvm); in __kvm_handle_hva_range()
617 if (IS_KVM_NULL_FN(range->handler)) in __kvm_handle_hva_range()
620 r.ret |= range->handler(kvm, &gfn_range); in __kvm_handle_hva_range()
624 if (range->flush_on_ret && r.ret) in __kvm_handle_hva_range()
631 srcu_read_unlock(&kvm->srcu, idx); in __kvm_handle_hva_range()
643 const struct kvm_mmu_notifier_range range = { in kvm_handle_hva_range() local
652 return __kvm_handle_hva_range(kvm, &range).ret; in kvm_handle_hva_range()
665 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_begin()
671 kvm->mmu_invalidate_in_progress++; in kvm_mmu_invalidate_begin()
673 if (likely(kvm->mmu_invalidate_in_progress == 1)) { in kvm_mmu_invalidate_begin()
674 kvm->mmu_invalidate_range_start = INVALID_GPA; in kvm_mmu_invalidate_begin()
675 kvm->mmu_invalidate_range_end = INVALID_GPA; in kvm_mmu_invalidate_begin()
681 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_range_add()
683 WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress); in kvm_mmu_invalidate_range_add()
685 if (likely(kvm->mmu_invalidate_range_start == INVALID_GPA)) { in kvm_mmu_invalidate_range_add()
686 kvm->mmu_invalidate_range_start = start; in kvm_mmu_invalidate_range_add()
687 kvm->mmu_invalidate_range_end = end; in kvm_mmu_invalidate_range_add()
691 * returns. Keep things simple and just find the minimal range in kvm_mmu_invalidate_range_add()
693 * enough information to subtract a range after its invalidate in kvm_mmu_invalidate_range_add()
698 kvm->mmu_invalidate_range_start = in kvm_mmu_invalidate_range_add()
699 min(kvm->mmu_invalidate_range_start, start); in kvm_mmu_invalidate_range_add()
700 kvm->mmu_invalidate_range_end = in kvm_mmu_invalidate_range_add()
701 max(kvm->mmu_invalidate_range_end, end); in kvm_mmu_invalidate_range_add()
705 bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) in kvm_mmu_unmap_gfn_range() argument
707 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_mmu_unmap_gfn_range()
708 return kvm_unmap_gfn_range(kvm, range); in kvm_mmu_unmap_gfn_range()
712 const struct mmu_notifier_range *range) in kvm_mmu_notifier_invalidate_range_start() argument
716 .start = range->start, in kvm_mmu_notifier_invalidate_range_start()
717 .end = range->end, in kvm_mmu_notifier_invalidate_range_start()
721 .may_block = mmu_notifier_range_blockable(range), in kvm_mmu_notifier_invalidate_range_start()
724 trace_kvm_unmap_hva_range(range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
734 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
735 kvm->mn_active_invalidate_count++; in kvm_mmu_notifier_invalidate_range_start()
736 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
748 gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
763 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_end()
770 kvm->mmu_invalidate_seq++; in kvm_mmu_invalidate_end()
777 kvm->mmu_invalidate_in_progress--; in kvm_mmu_invalidate_end()
778 KVM_BUG_ON(kvm->mmu_invalidate_in_progress < 0, kvm); in kvm_mmu_invalidate_end()
781 * Assert that at least one range was added between start() and end(). in kvm_mmu_invalidate_end()
782 * Not adding a range isn't fatal, but it is a KVM bug. in kvm_mmu_invalidate_end()
784 WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA); in kvm_mmu_invalidate_end()
788 const struct mmu_notifier_range *range) in kvm_mmu_notifier_invalidate_range_end() argument
792 .start = range->start, in kvm_mmu_notifier_invalidate_range_end()
793 .end = range->end, in kvm_mmu_notifier_invalidate_range_end()
797 .may_block = mmu_notifier_range_blockable(range), in kvm_mmu_notifier_invalidate_range_end()
804 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
805 if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count)) in kvm_mmu_notifier_invalidate_range_end()
806 --kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
807 wake = !kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
808 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
815 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); in kvm_mmu_notifier_invalidate_range_end()
838 * affect performance on pre-Haswell Intel EPT, where there is in kvm_mmu_notifier_clear_young()
868 idx = srcu_read_lock(&kvm->srcu); in kvm_mmu_notifier_release()
870 srcu_read_unlock(&kvm->srcu, idx); in kvm_mmu_notifier_release()
884 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; in kvm_init_mmu_notifier()
885 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); in kvm_init_mmu_notifier()
909 kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; in kvm_init_pm_notifier()
911 kvm->pm_notifier.priority = INT_MAX; in kvm_init_pm_notifier()
912 register_pm_notifier(&kvm->pm_notifier); in kvm_init_pm_notifier()
917 unregister_pm_notifier(&kvm->pm_notifier); in kvm_destroy_pm_notifier()
931 if (!memslot->dirty_bitmap) in kvm_destroy_dirty_bitmap()
934 vfree(memslot->dirty_bitmap); in kvm_destroy_dirty_bitmap()
935 memslot->dirty_bitmap = NULL; in kvm_destroy_dirty_bitmap()
941 if (slot->flags & KVM_MEM_GUEST_MEMFD) in kvm_free_memslot()
963 if (!slots->node_idx) in kvm_free_memslots()
966 hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) in kvm_free_memslots()
972 switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { in kvm_stats_debugfs_mode()
989 if (IS_ERR(kvm->debugfs_dentry)) in kvm_destroy_vm_debugfs()
992 debugfs_remove_recursive(kvm->debugfs_dentry); in kvm_destroy_vm_debugfs()
994 if (kvm->debugfs_stat_data) { in kvm_destroy_vm_debugfs()
996 kfree(kvm->debugfs_stat_data[i]); in kvm_destroy_vm_debugfs()
997 kfree(kvm->debugfs_stat_data); in kvm_destroy_vm_debugfs()
1008 int i, ret = -ENOMEM; in kvm_create_vm_debugfs()
1015 snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); in kvm_create_vm_debugfs()
1029 kvm->debugfs_dentry = dent; in kvm_create_vm_debugfs()
1030 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, in kvm_create_vm_debugfs()
1031 sizeof(*kvm->debugfs_stat_data), in kvm_create_vm_debugfs()
1033 if (!kvm->debugfs_stat_data) in kvm_create_vm_debugfs()
1042 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1043 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1044 stat_data->kind = KVM_STAT_VM; in kvm_create_vm_debugfs()
1045 kvm->debugfs_stat_data[i] = stat_data; in kvm_create_vm_debugfs()
1046 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1047 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1057 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1058 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1059 stat_data->kind = KVM_STAT_VCPU; in kvm_create_vm_debugfs()
1060 kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; in kvm_create_vm_debugfs()
1061 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1062 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1082 * Called after per-vm debugfs created. When called kvm->debugfs_dentry should
1083 * be setup already, so we can create arch-specific debugfs entries under it.
1085 * a per-arch destroy interface is not needed.
1098 return ERR_PTR(-ENOMEM); in kvm_create_vm()
1101 mmgrab(current->mm); in kvm_create_vm()
1102 kvm->mm = current->mm; in kvm_create_vm()
1104 mutex_init(&kvm->lock); in kvm_create_vm()
1105 mutex_init(&kvm->irq_lock); in kvm_create_vm()
1106 mutex_init(&kvm->slots_lock); in kvm_create_vm()
1107 mutex_init(&kvm->slots_arch_lock); in kvm_create_vm()
1108 spin_lock_init(&kvm->mn_invalidate_lock); in kvm_create_vm()
1109 rcuwait_init(&kvm->mn_memslots_update_rcuwait); in kvm_create_vm()
1110 xa_init(&kvm->vcpu_array); in kvm_create_vm()
1112 xa_init(&kvm->mem_attr_array); in kvm_create_vm()
1115 INIT_LIST_HEAD(&kvm->gpc_list); in kvm_create_vm()
1116 spin_lock_init(&kvm->gpc_lock); in kvm_create_vm()
1118 INIT_LIST_HEAD(&kvm->devices); in kvm_create_vm()
1119 kvm->max_vcpus = KVM_MAX_VCPUS; in kvm_create_vm()
1127 kvm->debugfs_dentry = ERR_PTR(-ENOENT); in kvm_create_vm()
1129 snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", in kvm_create_vm()
1132 r = -ENOMEM; in kvm_create_vm()
1133 if (init_srcu_struct(&kvm->srcu)) in kvm_create_vm()
1135 if (init_srcu_struct(&kvm->irq_srcu)) in kvm_create_vm()
1142 refcount_set(&kvm->users_count, 1); in kvm_create_vm()
1146 slots = &kvm->__memslots[i][j]; in kvm_create_vm()
1148 atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); in kvm_create_vm()
1149 slots->hva_tree = RB_ROOT_CACHED; in kvm_create_vm()
1150 slots->gfn_tree = RB_ROOT; in kvm_create_vm()
1151 hash_init(slots->id_hash); in kvm_create_vm()
1152 slots->node_idx = j; in kvm_create_vm()
1155 slots->generation = i; in kvm_create_vm()
1158 rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); in kvm_create_vm()
1161 r = -ENOMEM; in kvm_create_vm()
1163 rcu_assign_pointer(kvm->buses[i], in kvm_create_vm()
1165 if (!kvm->buses[i]) in kvm_create_vm()
1178 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); in kvm_create_vm()
1194 list_add(&kvm->vm_list, &vm_list); in kvm_create_vm()
1206 if (kvm->mmu_notifier.ops) in kvm_create_vm()
1207 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); in kvm_create_vm()
1214 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); in kvm_create_vm()
1219 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_create_vm()
1221 cleanup_srcu_struct(&kvm->srcu); in kvm_create_vm()
1224 mmdrop(current->mm); in kvm_create_vm()
1233 * We do not need to take the kvm->lock here, because nobody else in kvm_destroy_devices()
1241 * use-after-free, even though this cannot be guaranteed. in kvm_destroy_devices()
1243 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { in kvm_destroy_devices()
1244 list_del(&dev->vm_node); in kvm_destroy_devices()
1245 dev->ops->destroy(dev); in kvm_destroy_devices()
1252 struct mm_struct *mm = kvm->mm; in kvm_destroy_vm()
1259 list_del(&kvm->vm_list); in kvm_destroy_vm()
1265 struct kvm_io_bus *bus = kvm_get_bus(kvm, i); in kvm_destroy_vm() local
1267 if (bus) in kvm_destroy_vm()
1268 kvm_io_bus_destroy(bus); in kvm_destroy_vm()
1269 kvm->buses[i] = NULL; in kvm_destroy_vm()
1273 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); in kvm_destroy_vm()
1284 * in-progress invalidations. in kvm_destroy_vm()
1286 WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); in kvm_destroy_vm()
1287 if (kvm->mn_active_invalidate_count) in kvm_destroy_vm()
1288 kvm->mn_active_invalidate_count = 0; in kvm_destroy_vm()
1290 WARN_ON(kvm->mmu_invalidate_in_progress); in kvm_destroy_vm()
1297 kvm_free_memslots(kvm, &kvm->__memslots[i][0]); in kvm_destroy_vm()
1298 kvm_free_memslots(kvm, &kvm->__memslots[i][1]); in kvm_destroy_vm()
1300 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_destroy_vm()
1301 cleanup_srcu_struct(&kvm->srcu); in kvm_destroy_vm()
1303 xa_destroy(&kvm->mem_attr_array); in kvm_destroy_vm()
1313 refcount_inc(&kvm->users_count); in kvm_get_kvm()
1323 return refcount_inc_not_zero(&kvm->users_count); in kvm_get_kvm_safe()
1329 if (refcount_dec_and_test(&kvm->users_count)) in kvm_put_kvm()
1336 * with a user-visible file descriptor, e.g. a vcpu or device, if installation
1343 WARN_ON(refcount_dec_and_test(&kvm->users_count)); in kvm_put_kvm_no_destroy()
1349 struct kvm *kvm = filp->private_data; in kvm_vm_release()
1365 memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); in kvm_alloc_dirty_bitmap()
1366 if (!memslot->dirty_bitmap) in kvm_alloc_dirty_bitmap()
1367 return -ENOMEM; in kvm_alloc_dirty_bitmap()
1375 int node_idx_inactive = active->node_idx ^ 1; in kvm_get_inactive_memslots()
1377 return &kvm->__memslots[as_id][node_idx_inactive]; in kvm_get_inactive_memslots()
1382 * This also serves as a sanity that at least one of the pointers is non-NULL,
1392 return b->as_id; in kvm_memslots_get_as_id()
1394 return a->as_id; in kvm_memslots_get_as_id()
1396 WARN_ON_ONCE(a->as_id != b->as_id); in kvm_memslots_get_as_id()
1397 return a->as_id; in kvm_memslots_get_as_id()
1403 struct rb_root *gfn_tree = &slots->gfn_tree; in kvm_insert_gfn_node()
1405 int idx = slots->node_idx; in kvm_insert_gfn_node()
1408 for (node = &gfn_tree->rb_node; *node; ) { in kvm_insert_gfn_node()
1413 if (slot->base_gfn < tmp->base_gfn) in kvm_insert_gfn_node()
1414 node = &(*node)->rb_left; in kvm_insert_gfn_node()
1415 else if (slot->base_gfn > tmp->base_gfn) in kvm_insert_gfn_node()
1416 node = &(*node)->rb_right; in kvm_insert_gfn_node()
1421 rb_link_node(&slot->gfn_node[idx], parent, node); in kvm_insert_gfn_node()
1422 rb_insert_color(&slot->gfn_node[idx], gfn_tree); in kvm_insert_gfn_node()
1428 rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); in kvm_erase_gfn_node()
1435 int idx = slots->node_idx; in kvm_replace_gfn_node()
1437 WARN_ON_ONCE(old->base_gfn != new->base_gfn); in kvm_replace_gfn_node()
1439 rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], in kvm_replace_gfn_node()
1440 &slots->gfn_tree); in kvm_replace_gfn_node()
1449 * If @new is non-NULL its hva_node[slots_idx] range has to be set
1458 int idx = slots->node_idx; in kvm_replace_memslot()
1461 hash_del(&old->id_node[idx]); in kvm_replace_memslot()
1462 interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1464 if ((long)old == atomic_long_read(&slots->last_used_slot)) in kvm_replace_memslot()
1465 atomic_long_set(&slots->last_used_slot, (long)new); in kvm_replace_memslot()
1474 * Initialize @new's hva range. Do this even when replacing an @old in kvm_replace_memslot()
1477 new->hva_node[idx].start = new->userspace_addr; in kvm_replace_memslot()
1478 new->hva_node[idx].last = new->userspace_addr + in kvm_replace_memslot()
1479 (new->npages << PAGE_SHIFT) - 1; in kvm_replace_memslot()
1486 hash_add(slots->id_hash, &new->id_node[idx], new->id); in kvm_replace_memslot()
1487 interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1496 if (old && old->base_gfn == new->base_gfn) { in kvm_replace_memslot()
1522 if (mem->flags & KVM_MEM_GUEST_MEMFD) in check_memory_region_flags()
1526 * GUEST_MEMFD is incompatible with read-only memslots, as writes to in check_memory_region_flags()
1527 * read-only memslots have emulated MMIO, not page fault, semantics, in check_memory_region_flags()
1531 !(mem->flags & KVM_MEM_GUEST_MEMFD)) in check_memory_region_flags()
1534 if (mem->flags & ~valid_flags) in check_memory_region_flags()
1535 return -EINVAL; in check_memory_region_flags()
1545 u64 gen = __kvm_memslots(kvm, as_id)->generation; in kvm_swap_active_memslots()
1548 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1555 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1556 prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1557 while (kvm->mn_active_invalidate_count) { in kvm_swap_active_memslots()
1559 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1561 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1563 finish_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1564 rcu_assign_pointer(kvm->memslots[as_id], slots); in kvm_swap_active_memslots()
1565 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1572 mutex_unlock(&kvm->slots_arch_lock); in kvm_swap_active_memslots()
1574 synchronize_srcu_expedited(&kvm->srcu); in kvm_swap_active_memslots()
1578 * update in-progress flag and incrementing the generation based on in kvm_swap_active_memslots()
1582 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1595 slots->generation = gen; in kvm_swap_active_memslots()
1613 if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) in kvm_prepare_memory_region()
1614 new->dirty_bitmap = NULL; in kvm_prepare_memory_region()
1615 else if (old && old->dirty_bitmap) in kvm_prepare_memory_region()
1616 new->dirty_bitmap = old->dirty_bitmap; in kvm_prepare_memory_region()
1623 bitmap_set(new->dirty_bitmap, 0, new->npages); in kvm_prepare_memory_region()
1630 if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) in kvm_prepare_memory_region()
1641 int old_flags = old ? old->flags : 0; in kvm_commit_memory_region()
1642 int new_flags = new ? new->flags : 0; in kvm_commit_memory_region()
1648 kvm->nr_memslot_pages -= old->npages; in kvm_commit_memory_region()
1650 kvm->nr_memslot_pages += new->npages; in kvm_commit_memory_region()
1653 int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; in kvm_commit_memory_region()
1654 atomic_set(&kvm->nr_memslots_dirty_logging, in kvm_commit_memory_region()
1655 atomic_read(&kvm->nr_memslots_dirty_logging) + change); in kvm_commit_memory_region()
1674 if (old->dirty_bitmap && !new->dirty_bitmap) in kvm_commit_memory_region()
1713 dest->base_gfn = src->base_gfn; in kvm_copy_memslot()
1714 dest->npages = src->npages; in kvm_copy_memslot()
1715 dest->dirty_bitmap = src->dirty_bitmap; in kvm_copy_memslot()
1716 dest->arch = src->arch; in kvm_copy_memslot()
1717 dest->userspace_addr = src->userspace_addr; in kvm_copy_memslot()
1718 dest->flags = src->flags; in kvm_copy_memslot()
1719 dest->id = src->id; in kvm_copy_memslot()
1720 dest->as_id = src->as_id; in kvm_copy_memslot()
1733 invalid_slot->flags |= KVM_MEMSLOT_INVALID; in kvm_invalidate_memslot()
1741 kvm_swap_active_memslots(kvm, old->as_id); in kvm_invalidate_memslot()
1745 * memslot will be created. Validation of sp->gfn happens in: in kvm_invalidate_memslot()
1746 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) in kvm_invalidate_memslot()
1747 * - kvm_is_visible_gfn (mmu_check_root) in kvm_invalidate_memslot()
1753 mutex_lock(&kvm->slots_arch_lock); in kvm_invalidate_memslot()
1756 * Copy the arch-specific field of the newly-installed slot back to the in kvm_invalidate_memslot()
1758 * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock in kvm_invalidate_memslot()
1762 old->arch = invalid_slot->arch; in kvm_invalidate_memslot()
1833 mutex_lock(&kvm->slots_arch_lock); in kvm_set_memslot()
1839 * for the memslot when it is deleted/moved. Without pre-invalidation in kvm_set_memslot()
1842 * guest could access a non-existent memslot. in kvm_set_memslot()
1851 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1852 return -ENOMEM; in kvm_set_memslot()
1869 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1897 * No need to refresh new->arch, changes after dropping slots_arch_lock in kvm_set_memslot()
1899 * responsible for knowing that new->arch may be stale. in kvm_set_memslot()
1912 if (iter.slot->id != id) in kvm_check_memslot_overlap()
1930 lockdep_assert_held(&kvm->slots_lock); in kvm_set_memory_region()
1936 as_id = mem->slot >> 16; in kvm_set_memory_region()
1937 id = (u16)mem->slot; in kvm_set_memory_region()
1940 if ((mem->memory_size & (PAGE_SIZE - 1)) || in kvm_set_memory_region()
1941 (mem->memory_size != (unsigned long)mem->memory_size)) in kvm_set_memory_region()
1942 return -EINVAL; in kvm_set_memory_region()
1943 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) in kvm_set_memory_region()
1944 return -EINVAL; in kvm_set_memory_region()
1946 if ((mem->userspace_addr & (PAGE_SIZE - 1)) || in kvm_set_memory_region()
1947 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || in kvm_set_memory_region()
1948 !access_ok((void __user *)(unsigned long)mem->userspace_addr, in kvm_set_memory_region()
1949 mem->memory_size)) in kvm_set_memory_region()
1950 return -EINVAL; in kvm_set_memory_region()
1951 if (mem->flags & KVM_MEM_GUEST_MEMFD && in kvm_set_memory_region()
1952 (mem->guest_memfd_offset & (PAGE_SIZE - 1) || in kvm_set_memory_region()
1953 mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset)) in kvm_set_memory_region()
1954 return -EINVAL; in kvm_set_memory_region()
1956 return -EINVAL; in kvm_set_memory_region()
1957 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) in kvm_set_memory_region()
1958 return -EINVAL; in kvm_set_memory_region()
1961 * The size of userspace-defined memory regions is restricted in order in kvm_set_memory_region()
1967 (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) in kvm_set_memory_region()
1968 return -EINVAL; in kvm_set_memory_region()
1978 if (!mem->memory_size) { in kvm_set_memory_region()
1979 if (!old || !old->npages) in kvm_set_memory_region()
1980 return -EINVAL; in kvm_set_memory_region()
1982 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) in kvm_set_memory_region()
1983 return -EIO; in kvm_set_memory_region()
1988 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT); in kvm_set_memory_region()
1989 npages = (mem->memory_size >> PAGE_SHIFT); in kvm_set_memory_region()
1991 if (!old || !old->npages) { in kvm_set_memory_region()
1998 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) in kvm_set_memory_region()
1999 return -EINVAL; in kvm_set_memory_region()
2002 if (mem->flags & KVM_MEM_GUEST_MEMFD) in kvm_set_memory_region()
2003 return -EINVAL; in kvm_set_memory_region()
2004 if ((mem->userspace_addr != old->userspace_addr) || in kvm_set_memory_region()
2005 (npages != old->npages) || in kvm_set_memory_region()
2006 ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) in kvm_set_memory_region()
2007 return -EINVAL; in kvm_set_memory_region()
2009 if (base_gfn != old->base_gfn) in kvm_set_memory_region()
2011 else if (mem->flags != old->flags) in kvm_set_memory_region()
2019 return -EEXIST; in kvm_set_memory_region()
2024 return -ENOMEM; in kvm_set_memory_region()
2026 new->as_id = as_id; in kvm_set_memory_region()
2027 new->id = id; in kvm_set_memory_region()
2028 new->base_gfn = base_gfn; in kvm_set_memory_region()
2029 new->npages = npages; in kvm_set_memory_region()
2030 new->flags = mem->flags; in kvm_set_memory_region()
2031 new->userspace_addr = mem->userspace_addr; in kvm_set_memory_region()
2032 if (mem->flags & KVM_MEM_GUEST_MEMFD) { in kvm_set_memory_region()
2033 r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset); in kvm_set_memory_region()
2045 if (mem->flags & KVM_MEM_GUEST_MEMFD) in kvm_set_memory_region()
2055 if (WARN_ON_ONCE(mem->slot < KVM_USER_MEM_SLOTS)) in kvm_set_internal_memslot()
2056 return -EINVAL; in kvm_set_internal_memslot()
2058 if (WARN_ON_ONCE(mem->flags)) in kvm_set_internal_memslot()
2059 return -EINVAL; in kvm_set_internal_memslot()
2068 if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) in kvm_vm_ioctl_set_memory_region()
2069 return -EINVAL; in kvm_vm_ioctl_set_memory_region()
2071 guard(mutex)(&kvm->slots_lock); in kvm_vm_ioctl_set_memory_region()
2077 * kvm_get_dirty_log - get a snapshot of dirty pages
2093 return -ENXIO; in kvm_get_dirty_log()
2098 as_id = log->slot >> 16; in kvm_get_dirty_log()
2099 id = (u16)log->slot; in kvm_get_dirty_log()
2101 return -EINVAL; in kvm_get_dirty_log()
2105 if (!(*memslot) || !(*memslot)->dirty_bitmap) in kvm_get_dirty_log()
2106 return -ENOENT; in kvm_get_dirty_log()
2113 any = (*memslot)->dirty_bitmap[i]; in kvm_get_dirty_log()
2115 if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) in kvm_get_dirty_log()
2116 return -EFAULT; in kvm_get_dirty_log()
2126 * kvm_get_dirty_log_protect - get a snapshot of dirty pages
2158 return -ENXIO; in kvm_get_dirty_log_protect()
2160 as_id = log->slot >> 16; in kvm_get_dirty_log_protect()
2161 id = (u16)log->slot; in kvm_get_dirty_log_protect()
2163 return -EINVAL; in kvm_get_dirty_log_protect()
2167 if (!memslot || !memslot->dirty_bitmap) in kvm_get_dirty_log_protect()
2168 return -ENOENT; in kvm_get_dirty_log_protect()
2170 dirty_bitmap = memslot->dirty_bitmap; in kvm_get_dirty_log_protect()
2176 if (kvm->manual_dirty_log_protect) { in kvm_get_dirty_log_protect()
2212 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) in kvm_get_dirty_log_protect()
2213 return -EFAULT; in kvm_get_dirty_log_protect()
2219 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
2223 * Steps 1-4 below provide general overview of dirty page logging. See
2226 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
2242 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2246 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2251 * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
2270 return -ENXIO; in kvm_clear_dirty_log_protect()
2272 as_id = log->slot >> 16; in kvm_clear_dirty_log_protect()
2273 id = (u16)log->slot; in kvm_clear_dirty_log_protect()
2275 return -EINVAL; in kvm_clear_dirty_log_protect()
2277 if (log->first_page & 63) in kvm_clear_dirty_log_protect()
2278 return -EINVAL; in kvm_clear_dirty_log_protect()
2282 if (!memslot || !memslot->dirty_bitmap) in kvm_clear_dirty_log_protect()
2283 return -ENOENT; in kvm_clear_dirty_log_protect()
2285 dirty_bitmap = memslot->dirty_bitmap; in kvm_clear_dirty_log_protect()
2287 n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; in kvm_clear_dirty_log_protect()
2289 if (log->first_page > memslot->npages || in kvm_clear_dirty_log_protect()
2290 log->num_pages > memslot->npages - log->first_page || in kvm_clear_dirty_log_protect()
2291 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) in kvm_clear_dirty_log_protect()
2292 return -EINVAL; in kvm_clear_dirty_log_protect()
2298 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) in kvm_clear_dirty_log_protect()
2299 return -EFAULT; in kvm_clear_dirty_log_protect()
2302 for (offset = log->first_page, i = offset / BITS_PER_LONG, in kvm_clear_dirty_log_protect()
2303 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; in kvm_clear_dirty_log_protect()
2316 * a problem if userspace sets them in log->dirty_bitmap. in kvm_clear_dirty_log_protect()
2337 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2341 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2356 * Returns true if _all_ gfns in the range [@start, @end) have attributes
2362 XA_STATE(xas, &kvm->mem_attr_array, start); in kvm_range_has_memory_attributes()
2375 return !xas_find(&xas, end - 1); in kvm_range_has_memory_attributes()
2391 struct kvm_mmu_notifier_range *range) in kvm_handle_gfn_range() argument
2401 gfn_range.arg = range->arg; in kvm_handle_gfn_range()
2402 gfn_range.may_block = range->may_block; in kvm_handle_gfn_range()
2407 * range already has the desired private vs. shared state (it's unclear in kvm_handle_gfn_range()
2415 kvm_for_each_memslot_in_gfn_range(&iter, slots, range->start, range->end) { in kvm_handle_gfn_range()
2419 gfn_range.start = max(range->start, slot->base_gfn); in kvm_handle_gfn_range()
2420 gfn_range.end = min(range->end, slot->base_gfn + slot->npages); in kvm_handle_gfn_range()
2427 if (!IS_KVM_NULL_FN(range->on_lock)) in kvm_handle_gfn_range()
2428 range->on_lock(kvm); in kvm_handle_gfn_range()
2431 ret |= range->handler(kvm, &gfn_range); in kvm_handle_gfn_range()
2435 if (range->flush_on_ret && ret) in kvm_handle_gfn_range()
2443 struct kvm_gfn_range *range) in kvm_pre_set_memory_attributes() argument
2446 * Unconditionally add the range to the invalidation set, regardless of in kvm_pre_set_memory_attributes()
2450 * adding the range allows KVM to require that MMU invalidations add at in kvm_pre_set_memory_attributes()
2451 * least one range between begin() and end(), e.g. allows KVM to detect in kvm_pre_set_memory_attributes()
2456 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_pre_set_memory_attributes()
2458 return kvm_arch_pre_set_memory_attributes(kvm, range); in kvm_pre_set_memory_attributes()
2461 /* Set @attributes for the gfn range [@start, @end). */
2488 mutex_lock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2490 /* Nothing to do if the entire range as the desired attributes. */ in kvm_vm_set_mem_attributes()
2499 r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); in kvm_vm_set_mem_attributes()
2507 r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, in kvm_vm_set_mem_attributes()
2515 mutex_unlock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2525 if (attrs->flags) in kvm_vm_ioctl_set_mem_attributes()
2526 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2527 if (attrs->attributes & ~kvm_supported_mem_attributes(kvm)) in kvm_vm_ioctl_set_mem_attributes()
2528 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2529 if (attrs->size == 0 || attrs->address + attrs->size < attrs->address) in kvm_vm_ioctl_set_mem_attributes()
2530 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2531 if (!PAGE_ALIGNED(attrs->address) || !PAGE_ALIGNED(attrs->size)) in kvm_vm_ioctl_set_mem_attributes()
2532 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2534 start = attrs->address >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2535 end = (attrs->address + attrs->size) >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2539 * KVM. For simplicity, supports generic attributes only on 64-bit in kvm_vm_ioctl_set_mem_attributes()
2542 BUILD_BUG_ON(sizeof(attrs->attributes) != sizeof(unsigned long)); in kvm_vm_ioctl_set_mem_attributes()
2544 return kvm_vm_set_mem_attributes(kvm, start, end, attrs->attributes); in kvm_vm_ioctl_set_mem_attributes()
2557 u64 gen = slots->generation; in kvm_vcpu_gfn_to_memslot()
2564 if (unlikely(gen != vcpu->last_used_slot_gen)) { in kvm_vcpu_gfn_to_memslot()
2565 vcpu->last_used_slot = NULL; in kvm_vcpu_gfn_to_memslot()
2566 vcpu->last_used_slot_gen = gen; in kvm_vcpu_gfn_to_memslot()
2569 slot = try_get_memslot(vcpu->last_used_slot, gfn); in kvm_vcpu_gfn_to_memslot()
2576 * thrashing the VM-wide last_used_slot in kvm_memslots. in kvm_vcpu_gfn_to_memslot()
2580 vcpu->last_used_slot = slot; in kvm_vcpu_gfn_to_memslot()
2614 mmap_read_lock(current->mm); in kvm_host_page_size()
2615 vma = find_vma(current->mm, addr); in kvm_host_page_size()
2622 mmap_read_unlock(current->mm); in kvm_host_page_size()
2629 return slot->flags & KVM_MEM_READONLY; in memslot_is_readonly()
2635 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) in __gfn_to_hva_many()
2642 *nr_pages = slot->npages - (gfn - slot->base_gfn); in __gfn_to_hva_many()
2708 * Per page-flags.h, pages tagged PG_reserved "should in general not be in kvm_is_ad_tracked_page()
2753 if (kfp->map_writable) in kvm_resolve_pfn()
2754 *kfp->map_writable = writable; in kvm_resolve_pfn()
2757 pfn = map->pfn; in kvm_resolve_pfn()
2761 *kfp->refcounted_page = page; in kvm_resolve_pfn()
2776 * Try the fast-only path when the caller wants to pin/get the page for in hva_to_pfn_fast()
2779 * breaks Copy-on-Write (CoW), e.g. so that KVM doesn't end up pointing in hva_to_pfn_fast()
2780 * at the old, read-only page while mm/ points at a new, writable page. in hva_to_pfn_fast()
2782 if (!((kfp->flags & FOLL_WRITE) || kfp->map_writable)) in hva_to_pfn_fast()
2785 if (kfp->pin) in hva_to_pfn_fast()
2786 r = pin_user_pages_fast(kfp->hva, 1, FOLL_WRITE, &page) == 1; in hva_to_pfn_fast()
2788 r = get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page); in hva_to_pfn_fast()
2800 * 1 indicates success, -errno is returned if error is detected.
2815 unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT | kfp->flags; in hva_to_pfn_slow()
2819 if (kfp->pin) in hva_to_pfn_slow()
2820 npages = pin_user_pages_unlocked(kfp->hva, 1, &page, flags); in hva_to_pfn_slow()
2822 npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags); in hva_to_pfn_slow()
2831 if (WARN_ON_ONCE(kfp->map_writable && kfp->pin)) in hva_to_pfn_slow()
2835 if (!(flags & FOLL_WRITE) && kfp->map_writable && in hva_to_pfn_slow()
2836 get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) { in hva_to_pfn_slow()
2849 if (unlikely(!(vma->vm_flags & VM_READ))) in vma_is_valid()
2852 if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) in vma_is_valid()
2861 struct follow_pfnmap_args args = { .vma = vma, .address = kfp->hva }; in hva_to_pfn_remapped()
2862 bool write_fault = kfp->flags & FOLL_WRITE; in hva_to_pfn_remapped()
2870 if (kfp->pin && !allow_unsafe_mappings) in hva_to_pfn_remapped()
2871 return -EINVAL; in hva_to_pfn_remapped()
2880 r = fixup_user_fault(current->mm, kfp->hva, in hva_to_pfn_remapped()
2884 return -EAGAIN; in hva_to_pfn_remapped()
2912 if (WARN_ON_ONCE(!kfp->refcounted_page)) in hva_to_pfn()
2921 if (npages == -EINTR || npages == -EAGAIN) in hva_to_pfn()
2923 if (npages == -EHWPOISON) in hva_to_pfn()
2926 mmap_read_lock(current->mm); in hva_to_pfn()
2928 vma = vma_lookup(current->mm, kfp->hva); in hva_to_pfn()
2932 else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { in hva_to_pfn()
2934 if (r == -EAGAIN) in hva_to_pfn()
2939 if ((kfp->flags & FOLL_NOWAIT) && in hva_to_pfn()
2940 vma_is_valid(vma, kfp->flags & FOLL_WRITE)) in hva_to_pfn()
2945 mmap_read_unlock(current->mm); in hva_to_pfn()
2951 kfp->hva = __gfn_to_hva_many(kfp->slot, kfp->gfn, NULL, in kvm_follow_pfn()
2952 kfp->flags & FOLL_WRITE); in kvm_follow_pfn()
2954 if (kfp->hva == KVM_HVA_ERR_RO_BAD) in kvm_follow_pfn()
2957 if (kvm_is_error_hva(kfp->hva)) in kvm_follow_pfn()
2960 if (memslot_is_readonly(kfp->slot) && kfp->map_writable) { in kvm_follow_pfn()
2961 *kfp->map_writable = false; in kvm_follow_pfn()
2962 kfp->map_writable = NULL; in kvm_follow_pfn()
2998 return -1; in kvm_prefetch_pages()
3033 .slot = gfn_to_memslot(vcpu->kvm, gfn), in __kvm_vcpu_map()
3036 .refcounted_page = &map->pinned_page, in __kvm_vcpu_map()
3040 map->pinned_page = NULL; in __kvm_vcpu_map()
3041 map->page = NULL; in __kvm_vcpu_map()
3042 map->hva = NULL; in __kvm_vcpu_map()
3043 map->gfn = gfn; in __kvm_vcpu_map()
3044 map->writable = writable; in __kvm_vcpu_map()
3046 map->pfn = kvm_follow_pfn(&kfp); in __kvm_vcpu_map()
3047 if (is_error_noslot_pfn(map->pfn)) in __kvm_vcpu_map()
3048 return -EINVAL; in __kvm_vcpu_map()
3050 if (pfn_valid(map->pfn)) { in __kvm_vcpu_map()
3051 map->page = pfn_to_page(map->pfn); in __kvm_vcpu_map()
3052 map->hva = kmap(map->page); in __kvm_vcpu_map()
3055 map->hva = memremap(pfn_to_hpa(map->pfn), PAGE_SIZE, MEMREMAP_WB); in __kvm_vcpu_map()
3059 return map->hva ? 0 : -EFAULT; in __kvm_vcpu_map()
3065 if (!map->hva) in kvm_vcpu_unmap()
3068 if (map->page) in kvm_vcpu_unmap()
3069 kunmap(map->page); in kvm_vcpu_unmap()
3072 memunmap(map->hva); in kvm_vcpu_unmap()
3075 if (map->writable) in kvm_vcpu_unmap()
3076 kvm_vcpu_mark_page_dirty(vcpu, map->gfn); in kvm_vcpu_unmap()
3078 if (map->pinned_page) { in kvm_vcpu_unmap()
3079 if (map->writable) in kvm_vcpu_unmap()
3080 kvm_set_page_dirty(map->pinned_page); in kvm_vcpu_unmap()
3081 kvm_set_page_accessed(map->pinned_page); in kvm_vcpu_unmap()
3082 unpin_user_page(map->pinned_page); in kvm_vcpu_unmap()
3085 map->hva = NULL; in kvm_vcpu_unmap()
3086 map->page = NULL; in kvm_vcpu_unmap()
3087 map->pinned_page = NULL; in kvm_vcpu_unmap()
3093 if (len > PAGE_SIZE - offset) in next_segment()
3094 return PAGE_SIZE - offset; in next_segment()
3107 return -EFAULT; in __kvm_read_guest_page()
3111 return -EFAULT; in __kvm_read_guest_page()
3114 return -EFAULT; in __kvm_read_guest_page()
3148 len -= seg; in kvm_read_guest()
3168 len -= seg; in kvm_vcpu_read_guest()
3183 return -EFAULT; in __kvm_read_guest_atomic()
3187 return -EFAULT; in __kvm_read_guest_atomic()
3192 return -EFAULT; in __kvm_read_guest_atomic()
3216 return -EFAULT; in __kvm_write_guest_page()
3220 return -EFAULT; in __kvm_write_guest_page()
3223 return -EFAULT; in __kvm_write_guest_page()
3242 return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); in kvm_vcpu_write_guest_page()
3259 len -= seg; in kvm_write_guest()
3280 len -= seg; in kvm_vcpu_write_guest()
3294 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; in __kvm_gfn_to_hva_cache_init()
3295 gfn_t nr_pages_needed = end_gfn - start_gfn + 1; in __kvm_gfn_to_hva_cache_init()
3298 /* Update ghc->generation before performing any error checks. */ in __kvm_gfn_to_hva_cache_init()
3299 ghc->generation = slots->generation; in __kvm_gfn_to_hva_cache_init()
3302 ghc->hva = KVM_HVA_ERR_BAD; in __kvm_gfn_to_hva_cache_init()
3303 return -EINVAL; in __kvm_gfn_to_hva_cache_init()
3311 ghc->memslot = __gfn_to_memslot(slots, start_gfn); in __kvm_gfn_to_hva_cache_init()
3312 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, in __kvm_gfn_to_hva_cache_init()
3314 if (kvm_is_error_hva(ghc->hva)) in __kvm_gfn_to_hva_cache_init()
3315 return -EFAULT; in __kvm_gfn_to_hva_cache_init()
3320 ghc->hva += offset; in __kvm_gfn_to_hva_cache_init()
3322 ghc->memslot = NULL; in __kvm_gfn_to_hva_cache_init()
3324 ghc->gpa = gpa; in __kvm_gfn_to_hva_cache_init()
3325 ghc->len = len; in __kvm_gfn_to_hva_cache_init()
3343 gpa_t gpa = ghc->gpa + offset; in kvm_write_guest_offset_cached()
3345 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_write_guest_offset_cached()
3346 return -EINVAL; in kvm_write_guest_offset_cached()
3348 if (slots->generation != ghc->generation) { in kvm_write_guest_offset_cached()
3349 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_write_guest_offset_cached()
3350 return -EFAULT; in kvm_write_guest_offset_cached()
3353 if (kvm_is_error_hva(ghc->hva)) in kvm_write_guest_offset_cached()
3354 return -EFAULT; in kvm_write_guest_offset_cached()
3356 if (unlikely(!ghc->memslot)) in kvm_write_guest_offset_cached()
3359 r = __copy_to_user((void __user *)ghc->hva + offset, data, len); in kvm_write_guest_offset_cached()
3361 return -EFAULT; in kvm_write_guest_offset_cached()
3362 mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); in kvm_write_guest_offset_cached()
3381 gpa_t gpa = ghc->gpa + offset; in kvm_read_guest_offset_cached()
3383 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_read_guest_offset_cached()
3384 return -EINVAL; in kvm_read_guest_offset_cached()
3386 if (slots->generation != ghc->generation) { in kvm_read_guest_offset_cached()
3387 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_read_guest_offset_cached()
3388 return -EFAULT; in kvm_read_guest_offset_cached()
3391 if (kvm_is_error_hva(ghc->hva)) in kvm_read_guest_offset_cached()
3392 return -EFAULT; in kvm_read_guest_offset_cached()
3394 if (unlikely(!ghc->memslot)) in kvm_read_guest_offset_cached()
3397 r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); in kvm_read_guest_offset_cached()
3399 return -EFAULT; in kvm_read_guest_offset_cached()
3425 len -= seg; in kvm_clear_guest()
3439 if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) in mark_page_dirty_in_slot()
3446 unsigned long rel_gfn = gfn - memslot->base_gfn; in mark_page_dirty_in_slot()
3447 u32 slot = (memslot->as_id << 16) | memslot->id; in mark_page_dirty_in_slot()
3449 if (kvm->dirty_ring_size && vcpu) in mark_page_dirty_in_slot()
3451 else if (memslot->dirty_bitmap) in mark_page_dirty_in_slot()
3452 set_bit_le(rel_gfn, memslot->dirty_bitmap); in mark_page_dirty_in_slot()
3471 mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); in kvm_vcpu_mark_page_dirty()
3477 if (!vcpu->sigset_active) in kvm_sigset_activate()
3481 * This does a lockless modification of ->real_blocked, which is fine in kvm_sigset_activate()
3482 * because, only current can change ->real_blocked and all readers of in kvm_sigset_activate()
3483 * ->real_blocked don't care as long ->real_blocked is always a subset in kvm_sigset_activate()
3484 * of ->blocked. in kvm_sigset_activate()
3486 sigprocmask(SIG_SETMASK, &vcpu->sigset, &current->real_blocked); in kvm_sigset_activate()
3491 if (!vcpu->sigset_active) in kvm_sigset_deactivate()
3494 sigprocmask(SIG_SETMASK, &current->real_blocked, NULL); in kvm_sigset_deactivate()
3495 sigemptyset(&current->real_blocked); in kvm_sigset_deactivate()
3502 old = val = vcpu->halt_poll_ns; in grow_halt_poll_ns()
3512 vcpu->halt_poll_ns = val; in grow_halt_poll_ns()
3514 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); in grow_halt_poll_ns()
3521 old = val = vcpu->halt_poll_ns; in shrink_halt_poll_ns()
3532 vcpu->halt_poll_ns = val; in shrink_halt_poll_ns()
3533 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); in shrink_halt_poll_ns()
3538 int ret = -EINTR; in kvm_vcpu_check_block()
3539 int idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_check_block()
3552 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_check_block()
3559 * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
3566 vcpu->stat.generic.blocking = 1; in kvm_vcpu_block()
3588 vcpu->stat.generic.blocking = 0; in kvm_vcpu_block()
3596 struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic; in update_halt_poll_stats()
3599 ++vcpu->stat.generic.halt_attempted_poll; in update_halt_poll_stats()
3602 ++vcpu->stat.generic.halt_successful_poll; in update_halt_poll_stats()
3605 ++vcpu->stat.generic.halt_poll_invalid; in update_halt_poll_stats()
3607 stats->halt_poll_success_ns += poll_ns; in update_halt_poll_stats()
3608 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns); in update_halt_poll_stats()
3610 stats->halt_poll_fail_ns += poll_ns; in update_halt_poll_stats()
3611 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns); in update_halt_poll_stats()
3617 struct kvm *kvm = vcpu->kvm; in kvm_vcpu_max_halt_poll_ns()
3619 if (kvm->override_halt_poll_ns) { in kvm_vcpu_max_halt_poll_ns()
3621 * Ensure kvm->max_halt_poll_ns is not read before in kvm_vcpu_max_halt_poll_ns()
3622 * kvm->override_halt_poll_ns. in kvm_vcpu_max_halt_poll_ns()
3627 return READ_ONCE(kvm->max_halt_poll_ns); in kvm_vcpu_max_halt_poll_ns()
3648 if (vcpu->halt_poll_ns > max_halt_poll_ns) in kvm_vcpu_halt()
3649 vcpu->halt_poll_ns = max_halt_poll_ns; in kvm_vcpu_halt()
3651 do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; in kvm_vcpu_halt()
3655 ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); in kvm_vcpu_halt()
3669 vcpu->stat.generic.halt_wait_ns += in kvm_vcpu_halt()
3670 ktime_to_ns(cur) - ktime_to_ns(poll_end); in kvm_vcpu_halt()
3671 KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist, in kvm_vcpu_halt()
3672 ktime_to_ns(cur) - ktime_to_ns(poll_end)); in kvm_vcpu_halt()
3676 halt_ns = ktime_to_ns(cur) - ktime_to_ns(start); in kvm_vcpu_halt()
3679 * Note, halt-polling is considered successful so long as the vCPU was in kvm_vcpu_halt()
3681 * after of the halt-polling loop itself, but before the full wait. in kvm_vcpu_halt()
3693 if (halt_ns <= vcpu->halt_poll_ns) in kvm_vcpu_halt()
3696 else if (vcpu->halt_poll_ns && in kvm_vcpu_halt()
3700 else if (vcpu->halt_poll_ns < max_halt_poll_ns && in kvm_vcpu_halt()
3704 vcpu->halt_poll_ns = 0; in kvm_vcpu_halt()
3715 WRITE_ONCE(vcpu->ready, true); in kvm_vcpu_wake_up()
3716 ++vcpu->stat.generic.halt_wakeup; in kvm_vcpu_wake_up()
3743 if (vcpu->mode == IN_GUEST_MODE) in kvm_vcpu_kick()
3744 WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); in kvm_vcpu_kick()
3756 cpu = READ_ONCE(vcpu->cpu); in kvm_vcpu_kick()
3771 if (!read_trylock(&target->pid_lock)) in kvm_vcpu_yield_to()
3774 if (target->pid) in kvm_vcpu_yield_to()
3775 task = get_pid_task(target->pid, PIDTYPE_PID); in kvm_vcpu_yield_to()
3777 read_unlock(&target->pid_lock); in kvm_vcpu_yield_to()
3792 * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
3796 * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
3801 * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
3802 * to preempted lock-holder could result in wrong VCPU selection and CPU
3803 * burning. Giving priority for a potential lock-holder increases lock
3815 eligible = !vcpu->spin_loop.in_spin_loop || in kvm_vcpu_eligible_for_directed_yield()
3816 vcpu->spin_loop.dy_eligible; in kvm_vcpu_eligible_for_directed_yield()
3818 if (vcpu->spin_loop.in_spin_loop) in kvm_vcpu_eligible_for_directed_yield()
3819 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); in kvm_vcpu_eligible_for_directed_yield()
3843 if (!list_empty_careful(&vcpu->async_pf.done)) in vcpu_dy_runnable()
3855 * directly for cross-vCPU checks is functionally correct and accurate.
3870 struct kvm *kvm = me->kvm; in kvm_vcpu_on_spin()
3874 nr_vcpus = atomic_read(&kvm->online_vcpus); in kvm_vcpu_on_spin()
3894 * approximate a round-robin selection by iterating over all vCPUs, in kvm_vcpu_on_spin()
3895 * starting at the last boosted vCPU. I.e. if N=kvm->last_boosted_vcpu, in kvm_vcpu_on_spin()
3896 * iterate over vCPU[N+1]..vCPU[N-1], wrapping as needed. in kvm_vcpu_on_spin()
3902 start = READ_ONCE(kvm->last_boosted_vcpu) + 1; in kvm_vcpu_on_spin()
3905 if (idx == me->vcpu_idx) in kvm_vcpu_on_spin()
3908 vcpu = xa_load(&kvm->vcpu_array, idx); in kvm_vcpu_on_spin()
3909 if (!READ_ONCE(vcpu->ready)) in kvm_vcpu_on_spin()
3915 * Treat the target vCPU as being in-kernel if it has a pending in kvm_vcpu_on_spin()
3917 * waiting on IPI delivery, i.e. the target vCPU is in-kernel in kvm_vcpu_on_spin()
3920 if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && in kvm_vcpu_on_spin()
3930 WRITE_ONCE(kvm->last_boosted_vcpu, i); in kvm_vcpu_on_spin()
3932 } else if (yielded < 0 && !--try) { in kvm_vcpu_on_spin()
3948 kvm->dirty_ring_size / PAGE_SIZE); in kvm_page_in_dirty_ring()
3956 struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; in kvm_vcpu_fault()
3959 if (vmf->pgoff == 0) in kvm_vcpu_fault()
3960 page = virt_to_page(vcpu->run); in kvm_vcpu_fault()
3962 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) in kvm_vcpu_fault()
3963 page = virt_to_page(vcpu->arch.pio_data); in kvm_vcpu_fault()
3966 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) in kvm_vcpu_fault()
3967 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); in kvm_vcpu_fault()
3969 else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) in kvm_vcpu_fault()
3971 &vcpu->dirty_ring, in kvm_vcpu_fault()
3972 vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); in kvm_vcpu_fault()
3976 vmf->page = page; in kvm_vcpu_fault()
3986 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_mmap()
3989 if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || in kvm_vcpu_mmap()
3990 kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && in kvm_vcpu_mmap()
3991 ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) in kvm_vcpu_mmap()
3992 return -EINVAL; in kvm_vcpu_mmap()
3994 vma->vm_ops = &kvm_vcpu_vm_ops; in kvm_vcpu_mmap()
4000 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_release()
4002 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_release()
4021 snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); in create_vcpu_fd()
4030 read_lock(&vcpu->pid_lock); in vcpu_get_pid()
4031 *val = pid_nr(vcpu->pid); in vcpu_get_pid()
4032 read_unlock(&vcpu->pid_lock); in vcpu_get_pid()
4046 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); in kvm_create_vcpu_debugfs()
4048 vcpu->kvm->debugfs_dentry); in kvm_create_vcpu_debugfs()
4067 * too-large values instead of silently truncating. in kvm_vm_ioctl_create_vcpu()
4075 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4077 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4078 if (kvm->created_vcpus >= kvm->max_vcpus) { in kvm_vm_ioctl_create_vcpu()
4079 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4080 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4085 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4089 kvm->created_vcpus++; in kvm_vm_ioctl_create_vcpu()
4090 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4094 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4101 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4104 vcpu->run = page_address(page); in kvm_vm_ioctl_create_vcpu()
4112 if (kvm->dirty_ring_size) { in kvm_vm_ioctl_create_vcpu()
4113 r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, in kvm_vm_ioctl_create_vcpu()
4114 id, kvm->dirty_ring_size); in kvm_vm_ioctl_create_vcpu()
4119 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4122 r = -EEXIST; in kvm_vm_ioctl_create_vcpu()
4126 vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4127 r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); in kvm_vm_ioctl_create_vcpu()
4128 WARN_ON_ONCE(r == -EBUSY); in kvm_vm_ioctl_create_vcpu()
4136 * into a NULL-pointer dereference because KVM thinks the _current_ in kvm_vm_ioctl_create_vcpu()
4137 * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep in kvm_vm_ioctl_create_vcpu()
4138 * knows it's taken *inside* kvm->lock. in kvm_vm_ioctl_create_vcpu()
4140 mutex_lock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4148 * pointer before kvm->online_vcpu's incremented value. in kvm_vm_ioctl_create_vcpu()
4151 atomic_inc(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4152 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4154 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4160 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4162 xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); in kvm_vm_ioctl_create_vcpu()
4164 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4165 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vm_ioctl_create_vcpu()
4169 free_page((unsigned long)vcpu->run); in kvm_vm_ioctl_create_vcpu()
4173 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4174 kvm->created_vcpus--; in kvm_vm_ioctl_create_vcpu()
4175 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4183 vcpu->sigset_active = 1; in kvm_vcpu_ioctl_set_sigmask()
4184 vcpu->sigset = *sigset; in kvm_vcpu_ioctl_set_sigmask()
4186 vcpu->sigset_active = 0; in kvm_vcpu_ioctl_set_sigmask()
4193 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_read()
4195 return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, in kvm_vcpu_stats_read()
4196 &kvm_vcpu_stats_desc[0], &vcpu->stat, in kvm_vcpu_stats_read()
4197 sizeof(vcpu->stat), user_buffer, size, offset); in kvm_vcpu_stats_read()
4202 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_release()
4204 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_stats_release()
4221 snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); in kvm_vcpu_ioctl_get_stats_fd()
4233 kvm_get_kvm(vcpu->kvm); in kvm_vcpu_ioctl_get_stats_fd()
4235 file->f_mode |= FMODE_PREAD; in kvm_vcpu_ioctl_get_stats_fd()
4243 struct kvm_pre_fault_memory *range) in kvm_vcpu_pre_fault_memory() argument
4249 if (range->flags) in kvm_vcpu_pre_fault_memory()
4250 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4252 if (!PAGE_ALIGNED(range->gpa) || in kvm_vcpu_pre_fault_memory()
4253 !PAGE_ALIGNED(range->size) || in kvm_vcpu_pre_fault_memory()
4254 range->gpa + range->size <= range->gpa) in kvm_vcpu_pre_fault_memory()
4255 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4258 idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_pre_fault_memory()
4260 full_size = range->size; in kvm_vcpu_pre_fault_memory()
4263 r = -EINTR; in kvm_vcpu_pre_fault_memory()
4267 r = kvm_arch_vcpu_pre_fault_memory(vcpu, range); in kvm_vcpu_pre_fault_memory()
4268 if (WARN_ON_ONCE(r == 0 || r == -EIO)) in kvm_vcpu_pre_fault_memory()
4274 range->size -= r; in kvm_vcpu_pre_fault_memory()
4275 range->gpa += r; in kvm_vcpu_pre_fault_memory()
4277 } while (range->size); in kvm_vcpu_pre_fault_memory()
4279 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_pre_fault_memory()
4283 return full_size == range->size ? r : 0; in kvm_vcpu_pre_fault_memory()
4289 struct kvm *kvm = vcpu->kvm; in kvm_wait_for_vcpu_online()
4292 * In practice, this happy path will always be taken, as a well-behaved in kvm_wait_for_vcpu_online()
4295 if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus))) in kvm_wait_for_vcpu_online()
4303 if (mutex_lock_killable(&vcpu->mutex)) in kvm_wait_for_vcpu_online()
4304 return -EINTR; in kvm_wait_for_vcpu_online()
4306 mutex_unlock(&vcpu->mutex); in kvm_wait_for_vcpu_online()
4308 if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx))) in kvm_wait_for_vcpu_online()
4309 return -EIO; in kvm_wait_for_vcpu_online()
4317 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_ioctl()
4323 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_ioctl()
4324 return -EIO; in kvm_vcpu_ioctl()
4327 return -EINVAL; in kvm_vcpu_ioctl()
4343 if (r != -ENOIOCTLCMD) in kvm_vcpu_ioctl()
4346 if (mutex_lock_killable(&vcpu->mutex)) in kvm_vcpu_ioctl()
4347 return -EINTR; in kvm_vcpu_ioctl()
4351 r = -EINVAL; in kvm_vcpu_ioctl()
4356 * Note, vcpu->pid is primarily protected by vcpu->mutex. The in kvm_vcpu_ioctl()
4358 * read vcpu->pid while this vCPU is in KVM_RUN, e.g. to yield in kvm_vcpu_ioctl()
4361 oldpid = vcpu->pid; in kvm_vcpu_ioctl()
4371 write_lock(&vcpu->pid_lock); in kvm_vcpu_ioctl()
4372 vcpu->pid = newpid; in kvm_vcpu_ioctl()
4373 write_unlock(&vcpu->pid_lock); in kvm_vcpu_ioctl()
4377 vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); in kvm_vcpu_ioctl()
4379 vcpu->wants_to_run = false; in kvm_vcpu_ioctl()
4381 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); in kvm_vcpu_ioctl()
4387 r = -ENOMEM; in kvm_vcpu_ioctl()
4394 r = -EFAULT; in kvm_vcpu_ioctl()
4416 r = -ENOMEM; in kvm_vcpu_ioctl()
4422 r = -EFAULT; in kvm_vcpu_ioctl()
4444 r = -EFAULT; in kvm_vcpu_ioctl()
4453 r = -EFAULT; in kvm_vcpu_ioctl()
4462 r = -EFAULT; in kvm_vcpu_ioctl()
4468 r = -EFAULT; in kvm_vcpu_ioctl()
4477 r = -EFAULT; in kvm_vcpu_ioctl()
4490 r = -EFAULT; in kvm_vcpu_ioctl()
4494 r = -EINVAL; in kvm_vcpu_ioctl()
4497 r = -EFAULT; in kvm_vcpu_ioctl()
4498 if (copy_from_user(&sigset, sigmask_arg->sigset, in kvm_vcpu_ioctl()
4508 r = -ENOMEM; in kvm_vcpu_ioctl()
4514 r = -EFAULT; in kvm_vcpu_ioctl()
4536 struct kvm_pre_fault_memory range; in kvm_vcpu_ioctl() local
4538 r = -EFAULT; in kvm_vcpu_ioctl()
4539 if (copy_from_user(&range, argp, sizeof(range))) in kvm_vcpu_ioctl()
4541 r = kvm_vcpu_pre_fault_memory(vcpu, &range); in kvm_vcpu_ioctl()
4542 /* Pass back leftover range. */ in kvm_vcpu_ioctl()
4543 if (copy_to_user(argp, &range, sizeof(range))) in kvm_vcpu_ioctl()
4544 r = -EFAULT; in kvm_vcpu_ioctl()
4552 mutex_unlock(&vcpu->mutex); in kvm_vcpu_ioctl()
4562 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_compat_ioctl()
4566 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_compat_ioctl()
4567 return -EIO; in kvm_vcpu_compat_ioctl()
4576 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4580 r = -EINVAL; in kvm_vcpu_compat_ioctl()
4583 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4585 (compat_sigset_t __user *)sigmask_arg->sigset)) in kvm_vcpu_compat_ioctl()
4603 struct kvm_device *dev = filp->private_data; in kvm_device_mmap()
4605 if (dev->ops->mmap) in kvm_device_mmap()
4606 return dev->ops->mmap(dev, vma); in kvm_device_mmap()
4608 return -ENODEV; in kvm_device_mmap()
4619 return -EPERM; in kvm_device_ioctl_attr()
4622 return -EFAULT; in kvm_device_ioctl_attr()
4630 struct kvm_device *dev = filp->private_data; in kvm_device_ioctl()
4632 if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) in kvm_device_ioctl()
4633 return -EIO; in kvm_device_ioctl()
4637 return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); in kvm_device_ioctl()
4639 return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); in kvm_device_ioctl()
4641 return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); in kvm_device_ioctl()
4643 if (dev->ops->ioctl) in kvm_device_ioctl()
4644 return dev->ops->ioctl(dev, ioctl, arg); in kvm_device_ioctl()
4646 return -ENOTTY; in kvm_device_ioctl()
4652 struct kvm_device *dev = filp->private_data; in kvm_device_release()
4653 struct kvm *kvm = dev->kvm; in kvm_device_release()
4655 if (dev->ops->release) { in kvm_device_release()
4656 mutex_lock(&kvm->lock); in kvm_device_release()
4657 list_del_rcu(&dev->vm_node); in kvm_device_release()
4659 dev->ops->release(dev); in kvm_device_release()
4660 mutex_unlock(&kvm->lock); in kvm_device_release()
4676 if (filp->f_op != &kvm_device_fops) in kvm_device_from_filp()
4679 return filp->private_data; in kvm_device_from_filp()
4692 return -ENOSPC; in kvm_register_device_ops()
4695 return -EEXIST; in kvm_register_device_ops()
4712 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; in kvm_ioctl_create_device()
4716 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) in kvm_ioctl_create_device()
4717 return -ENODEV; in kvm_ioctl_create_device()
4719 type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); in kvm_ioctl_create_device()
4722 return -ENODEV; in kvm_ioctl_create_device()
4729 return -ENOMEM; in kvm_ioctl_create_device()
4731 dev->ops = ops; in kvm_ioctl_create_device()
4732 dev->kvm = kvm; in kvm_ioctl_create_device()
4734 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4735 ret = ops->create(dev, type); in kvm_ioctl_create_device()
4737 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4741 list_add_rcu(&dev->vm_node, &kvm->devices); in kvm_ioctl_create_device()
4742 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4744 if (ops->init) in kvm_ioctl_create_device()
4745 ops->init(dev); in kvm_ioctl_create_device()
4748 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); in kvm_ioctl_create_device()
4751 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4752 list_del_rcu(&dev->vm_node); in kvm_ioctl_create_device()
4754 if (ops->release) in kvm_ioctl_create_device()
4755 ops->release(dev); in kvm_ioctl_create_device()
4756 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4757 if (ops->destroy) in kvm_ioctl_create_device()
4758 ops->destroy(dev); in kvm_ioctl_create_device()
4762 cd->fd = ret; in kvm_ioctl_create_device()
4845 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4848 if (!size || (size & (size - 1))) in kvm_vm_ioctl_enable_dirty_log_ring()
4849 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4854 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4858 return -E2BIG; in kvm_vm_ioctl_enable_dirty_log_ring()
4861 if (kvm->dirty_ring_size) in kvm_vm_ioctl_enable_dirty_log_ring()
4862 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4864 mutex_lock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4866 if (kvm->created_vcpus) { in kvm_vm_ioctl_enable_dirty_log_ring()
4868 r = -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4870 kvm->dirty_ring_size = size; in kvm_vm_ioctl_enable_dirty_log_ring()
4874 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4884 if (!kvm->dirty_ring_size) in kvm_vm_ioctl_reset_dirty_pages()
4885 return -EINVAL; in kvm_vm_ioctl_reset_dirty_pages()
4887 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4890 cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); in kvm_vm_ioctl_reset_dirty_pages()
4892 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4903 return -EINVAL; in kvm_vm_ioctl_enable_cap()
4910 lockdep_assert_held(&kvm->slots_lock); in kvm_are_all_memslots_empty()
4924 switch (cap->cap) { in kvm_vm_ioctl_enable_cap_generic()
4929 if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) in kvm_vm_ioctl_enable_cap_generic()
4932 if (cap->flags || (cap->args[0] & ~allowed_options)) in kvm_vm_ioctl_enable_cap_generic()
4933 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4934 kvm->manual_dirty_log_protect = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
4939 if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) in kvm_vm_ioctl_enable_cap_generic()
4940 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4942 kvm->max_halt_poll_ns = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
4945 * Ensure kvm->override_halt_poll_ns does not become visible in kvm_vm_ioctl_enable_cap_generic()
4946 * before kvm->max_halt_poll_ns. in kvm_vm_ioctl_enable_cap_generic()
4951 kvm->override_halt_poll_ns = true; in kvm_vm_ioctl_enable_cap_generic()
4957 if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) in kvm_vm_ioctl_enable_cap_generic()
4958 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4960 return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); in kvm_vm_ioctl_enable_cap_generic()
4962 int r = -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4965 !kvm->dirty_ring_size || cap->flags) in kvm_vm_ioctl_enable_cap_generic()
4968 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
4976 kvm->dirty_ring_with_bitmap = true; in kvm_vm_ioctl_enable_cap_generic()
4980 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
4992 struct kvm *kvm = file->private_data; in kvm_vm_stats_read()
4994 return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, in kvm_vm_stats_read()
4995 &kvm_vm_stats_desc[0], &kvm->stat, in kvm_vm_stats_read()
4996 sizeof(kvm->stat), user_buffer, size, offset); in kvm_vm_stats_read()
5001 struct kvm *kvm = file->private_data; in kvm_vm_stats_release()
5023 file = anon_inode_getfile("kvm-vm-stats", in kvm_vm_ioctl_get_stats_fd()
5032 file->f_mode |= FMODE_PREAD; in kvm_vm_ioctl_get_stats_fd()
5049 struct kvm *kvm = filp->private_data; in kvm_vm_ioctl()
5053 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_ioctl()
5054 return -EIO; in kvm_vm_ioctl()
5062 r = -EFAULT; in kvm_vm_ioctl()
5091 r = -EFAULT; in kvm_vm_ioctl()
5095 r = -EINVAL; in kvm_vm_ioctl()
5106 r = -EFAULT; in kvm_vm_ioctl()
5116 r = -EFAULT; in kvm_vm_ioctl()
5127 r = -EFAULT; in kvm_vm_ioctl()
5136 r = -EFAULT; in kvm_vm_ioctl()
5146 r = -EFAULT; in kvm_vm_ioctl()
5155 r = -EFAULT; in kvm_vm_ioctl()
5165 r = -EFAULT; in kvm_vm_ioctl()
5177 r = -EFAULT; in kvm_vm_ioctl()
5186 r = -EFAULT; in kvm_vm_ioctl()
5202 r = -EFAULT; in kvm_vm_ioctl()
5205 r = -EINVAL; in kvm_vm_ioctl()
5214 entries = vmemdup_array_user(urouting->entries, in kvm_vm_ioctl()
5231 r = -EFAULT; in kvm_vm_ioctl()
5242 r = -EFAULT; in kvm_vm_ioctl()
5250 r = -EFAULT; in kvm_vm_ioctl()
5270 r = -EFAULT; in kvm_vm_ioctl()
5308 return -ENOTTY; in kvm_arch_vm_compat_ioctl()
5314 struct kvm *kvm = filp->private_data; in kvm_vm_compat_ioctl()
5317 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_compat_ioctl()
5318 return -EIO; in kvm_vm_compat_ioctl()
5321 if (r != -ENOTTY) in kvm_vm_compat_ioctl()
5332 return -EFAULT; in kvm_vm_compat_ioctl()
5349 return -EFAULT; in kvm_vm_compat_ioctl()
5374 return file && file->f_op == &kvm_vm_fops; in file_is_kvm()
5397 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); in kvm_dev_ioctl_create_vm()
5404 * Don't call kvm_put_kvm anymore at this point; file->f_op is in kvm_dev_ioctl_create_vm()
5405 * already set, with ->release() being kvm_vm_release(). In error in kvm_dev_ioctl_create_vm()
5424 int r = -EINVAL; in kvm_dev_ioctl()
5497 return -EIO; in kvm_enable_virtualization_cpu()
5599 * If userspace initiated a forced reboot, e.g. reboot -f, then it's in kvm_enable_virtualization()
5600 * possible for an in-flight operation to enable virtualization after in kvm_enable_virtualization()
5610 r = -EBUSY; in kvm_enable_virtualization()
5621 --kvm_usage_count; in kvm_enable_virtualization()
5629 if (--kvm_usage_count) in kvm_disable_virtualization()
5674 if (dev->ops->destructor) in kvm_iodevice_destructor()
5675 dev->ops->destructor(dev); in kvm_iodevice_destructor()
5678 static void kvm_io_bus_destroy(struct kvm_io_bus *bus) in kvm_io_bus_destroy() argument
5682 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_destroy()
5683 struct kvm_io_device *pos = bus->range[i].dev; in kvm_io_bus_destroy()
5687 kfree(bus); in kvm_io_bus_destroy()
5693 gpa_t addr1 = r1->addr; in kvm_io_bus_cmp()
5694 gpa_t addr2 = r2->addr; in kvm_io_bus_cmp()
5697 return -1; in kvm_io_bus_cmp()
5699 /* If r2->len == 0, match the exact address. If r2->len != 0, in kvm_io_bus_cmp()
5704 if (r2->len) { in kvm_io_bus_cmp()
5705 addr1 += r1->len; in kvm_io_bus_cmp()
5706 addr2 += r2->len; in kvm_io_bus_cmp()
5720 static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, in kvm_io_bus_get_first_dev() argument
5723 struct kvm_io_range *range, key; in kvm_io_bus_get_first_dev() local
5731 range = bsearch(&key, bus->range, bus->dev_count, in kvm_io_bus_get_first_dev()
5733 if (range == NULL) in kvm_io_bus_get_first_dev()
5734 return -ENOENT; in kvm_io_bus_get_first_dev()
5736 off = range - bus->range; in kvm_io_bus_get_first_dev()
5738 while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) in kvm_io_bus_get_first_dev()
5739 off--; in kvm_io_bus_get_first_dev()
5744 static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, in __kvm_io_bus_write() argument
5745 struct kvm_io_range *range, const void *val) in __kvm_io_bus_write() argument
5749 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_write()
5751 return -EOPNOTSUPP; in __kvm_io_bus_write()
5753 while (idx < bus->dev_count && in __kvm_io_bus_write()
5754 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_write()
5755 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_write()
5756 range->len, val)) in __kvm_io_bus_write()
5761 return -EOPNOTSUPP; in __kvm_io_bus_write()
5764 /* kvm_io_bus_write - called under kvm->slots_lock */
5768 struct kvm_io_bus *bus; in kvm_io_bus_write() local
5769 struct kvm_io_range range; in kvm_io_bus_write() local
5772 range = (struct kvm_io_range) { in kvm_io_bus_write()
5777 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write()
5778 if (!bus) in kvm_io_bus_write()
5779 return -ENOMEM; in kvm_io_bus_write()
5780 r = __kvm_io_bus_write(vcpu, bus, &range, val); in kvm_io_bus_write()
5785 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
5789 struct kvm_io_bus *bus; in kvm_io_bus_write_cookie() local
5790 struct kvm_io_range range; in kvm_io_bus_write_cookie() local
5792 range = (struct kvm_io_range) { in kvm_io_bus_write_cookie()
5797 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write_cookie()
5798 if (!bus) in kvm_io_bus_write_cookie()
5799 return -ENOMEM; in kvm_io_bus_write_cookie()
5802 if ((cookie >= 0) && (cookie < bus->dev_count) && in kvm_io_bus_write_cookie()
5803 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) in kvm_io_bus_write_cookie()
5804 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, in kvm_io_bus_write_cookie()
5812 return __kvm_io_bus_write(vcpu, bus, &range, val); in kvm_io_bus_write_cookie()
5815 static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, in __kvm_io_bus_read() argument
5816 struct kvm_io_range *range, void *val) in __kvm_io_bus_read() argument
5820 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_read()
5822 return -EOPNOTSUPP; in __kvm_io_bus_read()
5824 while (idx < bus->dev_count && in __kvm_io_bus_read()
5825 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_read()
5826 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_read()
5827 range->len, val)) in __kvm_io_bus_read()
5832 return -EOPNOTSUPP; in __kvm_io_bus_read()
5835 /* kvm_io_bus_read - called under kvm->slots_lock */
5839 struct kvm_io_bus *bus; in kvm_io_bus_read() local
5840 struct kvm_io_range range; in kvm_io_bus_read() local
5843 range = (struct kvm_io_range) { in kvm_io_bus_read()
5848 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_read()
5849 if (!bus) in kvm_io_bus_read()
5850 return -ENOMEM; in kvm_io_bus_read()
5851 r = __kvm_io_bus_read(vcpu, bus, &range, val); in kvm_io_bus_read()
5859 struct kvm_io_bus *new_bus, *bus; in kvm_io_bus_register_dev() local
5860 struct kvm_io_range range; in kvm_io_bus_register_dev() local
5862 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_register_dev()
5864 bus = kvm_get_bus(kvm, bus_idx); in kvm_io_bus_register_dev()
5865 if (!bus) in kvm_io_bus_register_dev()
5866 return -ENOMEM; in kvm_io_bus_register_dev()
5869 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) in kvm_io_bus_register_dev()
5870 return -ENOSPC; in kvm_io_bus_register_dev()
5872 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), in kvm_io_bus_register_dev()
5875 return -ENOMEM; in kvm_io_bus_register_dev()
5877 range = (struct kvm_io_range) { in kvm_io_bus_register_dev()
5883 for (i = 0; i < bus->dev_count; i++) in kvm_io_bus_register_dev()
5884 if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) in kvm_io_bus_register_dev()
5887 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); in kvm_io_bus_register_dev()
5888 new_bus->dev_count++; in kvm_io_bus_register_dev()
5889 new_bus->range[i] = range; in kvm_io_bus_register_dev()
5890 memcpy(new_bus->range + i + 1, bus->range + i, in kvm_io_bus_register_dev()
5891 (bus->dev_count - i) * sizeof(struct kvm_io_range)); in kvm_io_bus_register_dev()
5892 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_register_dev()
5893 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_register_dev()
5894 kfree(bus); in kvm_io_bus_register_dev()
5903 struct kvm_io_bus *new_bus, *bus; in kvm_io_bus_unregister_dev() local
5905 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_unregister_dev()
5907 bus = kvm_get_bus(kvm, bus_idx); in kvm_io_bus_unregister_dev()
5908 if (!bus) in kvm_io_bus_unregister_dev()
5911 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_unregister_dev()
5912 if (bus->range[i].dev == dev) { in kvm_io_bus_unregister_dev()
5917 if (i == bus->dev_count) in kvm_io_bus_unregister_dev()
5920 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), in kvm_io_bus_unregister_dev()
5923 memcpy(new_bus, bus, struct_size(bus, range, i)); in kvm_io_bus_unregister_dev()
5924 new_bus->dev_count--; in kvm_io_bus_unregister_dev()
5925 memcpy(new_bus->range + i, bus->range + i + 1, in kvm_io_bus_unregister_dev()
5926 flex_array_size(new_bus, range, new_bus->dev_count - i)); in kvm_io_bus_unregister_dev()
5929 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_unregister_dev()
5930 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_unregister_dev()
5933 * If NULL bus is installed, destroy the old bus, including all the in kvm_io_bus_unregister_dev()
5937 pr_err("kvm: failed to shrink bus, removing it completely\n"); in kvm_io_bus_unregister_dev()
5938 kvm_io_bus_destroy(bus); in kvm_io_bus_unregister_dev()
5939 return -ENOMEM; in kvm_io_bus_unregister_dev()
5943 kfree(bus); in kvm_io_bus_unregister_dev()
5950 struct kvm_io_bus *bus; in kvm_io_bus_get_dev() local
5954 srcu_idx = srcu_read_lock(&kvm->srcu); in kvm_io_bus_get_dev()
5956 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); in kvm_io_bus_get_dev()
5957 if (!bus) in kvm_io_bus_get_dev()
5960 dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); in kvm_io_bus_get_dev()
5964 iodev = bus->range[dev_idx].dev; in kvm_io_bus_get_dev()
5967 srcu_read_unlock(&kvm->srcu, srcu_idx); in kvm_io_bus_get_dev()
5978 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_open()
5985 if (!kvm_get_kvm_safe(stat_data->kvm)) in kvm_debugfs_open()
5986 return -ENOENT; in kvm_debugfs_open()
5989 kvm_stats_debugfs_mode(stat_data->desc) & 0222 in kvm_debugfs_open()
5992 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_open()
5999 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_release()
6002 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_release()
6009 *val = *(u64 *)((void *)(&kvm->stat) + offset); in kvm_get_stat_per_vm()
6016 *(u64 *)((void *)(&kvm->stat) + offset) = 0; in kvm_clear_stat_per_vm()
6029 *val += *(u64 *)((void *)(&vcpu->stat) + offset); in kvm_get_stat_per_vcpu()
6040 *(u64 *)((void *)(&vcpu->stat) + offset) = 0; in kvm_clear_stat_per_vcpu()
6047 int r = -EFAULT; in kvm_stat_data_get()
6050 switch (stat_data->kind) { in kvm_stat_data_get()
6052 r = kvm_get_stat_per_vm(stat_data->kvm, in kvm_stat_data_get()
6053 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6056 r = kvm_get_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_get()
6057 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6066 int r = -EFAULT; in kvm_stat_data_clear()
6070 return -EINVAL; in kvm_stat_data_clear()
6072 switch (stat_data->kind) { in kvm_stat_data_clear()
6074 r = kvm_clear_stat_per_vm(stat_data->kvm, in kvm_stat_data_clear()
6075 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6078 r = kvm_clear_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_clear()
6079 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6123 return -EINVAL; in vm_stat_clear()
6159 return -EINVAL; in vcpu_stat_clear()
6187 kvm_active_vms--; in kvm_uevent_notify_change()
6202 kvm->userspace_pid = task_pid_nr(current); in kvm_uevent_notify_change()
6206 add_uevent_var(env, "PID=%d", kvm->userspace_pid); in kvm_uevent_notify_change()
6208 if (!IS_ERR(kvm->debugfs_dentry)) { in kvm_uevent_notify_change()
6212 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); in kvm_uevent_notify_change()
6219 env->envp[env->envp_idx++] = NULL; in kvm_uevent_notify_change()
6220 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); in kvm_uevent_notify_change()
6238 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6240 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6249 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6251 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6265 WRITE_ONCE(vcpu->preempted, false); in kvm_sched_in()
6266 WRITE_ONCE(vcpu->ready, false); in kvm_sched_in()
6271 WRITE_ONCE(vcpu->scheduled_out, false); in kvm_sched_in()
6279 WRITE_ONCE(vcpu->scheduled_out, true); in kvm_sched_out()
6281 if (task_is_runnable(current) && vcpu->wants_to_run) { in kvm_sched_out()
6282 WRITE_ONCE(vcpu->preempted, true); in kvm_sched_out()
6283 WRITE_ONCE(vcpu->ready, true); in kvm_sched_out()
6290 * kvm_get_running_vcpu - get the vcpu running on the current CPU.
6292 * We can disable preemption locally around accessing the per-CPU variable,
6295 * the per-CPU value later will give us the same value as we update the
6296 * per-CPU variable in the preempt notifier handlers.
6311 * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
6375 - offsetof(struct kvm_vcpu, arch), in kvm_init()
6378 return -ENOMEM; in kvm_init()
6383 r = -ENOMEM; in kvm_init()