xref: /linux/arch/x86/kvm/mmu/page_track.c (revision 43db1111073049220381944af4a3b8a5400eda71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Support KVM gust page tracking
4  *
5  * This feature allows us to track page access in guest. Currently, only
6  * write access is tracked.
7  *
8  * Copyright(C) 2015 Intel Corporation.
9  *
10  * Author:
11  *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
12  */
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/lockdep.h>
16 #include <linux/kvm_host.h>
17 #include <linux/rculist.h>
18 
19 #include "mmu.h"
20 #include "mmu_internal.h"
21 #include "page_track.h"
22 
kvm_external_write_tracking_enabled(struct kvm * kvm)23 static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
24 {
25 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
26 	/*
27 	 * Read external_write_tracking_enabled before related pointers.  Pairs
28 	 * with the smp_store_release in kvm_page_track_write_tracking_enable().
29 	 */
30 	return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
31 #else
32 	return false;
33 #endif
34 }
35 
kvm_page_track_write_tracking_enabled(struct kvm * kvm)36 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
37 {
38 	return kvm_external_write_tracking_enabled(kvm) ||
39 	       kvm_shadow_root_allocated(kvm) || !tdp_enabled;
40 }
41 
kvm_page_track_free_memslot(struct kvm_memory_slot * slot)42 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
43 {
44 	vfree(slot->arch.gfn_write_track);
45 	slot->arch.gfn_write_track = NULL;
46 }
47 
__kvm_page_track_write_tracking_alloc(struct kvm_memory_slot * slot,unsigned long npages)48 static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
49 						 unsigned long npages)
50 {
51 	const size_t size = sizeof(*slot->arch.gfn_write_track);
52 
53 	if (!slot->arch.gfn_write_track)
54 		slot->arch.gfn_write_track = __vcalloc(npages, size,
55 						       GFP_KERNEL_ACCOUNT);
56 
57 	return slot->arch.gfn_write_track ? 0 : -ENOMEM;
58 }
59 
kvm_page_track_create_memslot(struct kvm * kvm,struct kvm_memory_slot * slot,unsigned long npages)60 int kvm_page_track_create_memslot(struct kvm *kvm,
61 				  struct kvm_memory_slot *slot,
62 				  unsigned long npages)
63 {
64 	if (!kvm_page_track_write_tracking_enabled(kvm))
65 		return 0;
66 
67 	return __kvm_page_track_write_tracking_alloc(slot, npages);
68 }
69 
kvm_page_track_write_tracking_alloc(struct kvm_memory_slot * slot)70 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
71 {
72 	return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
73 }
74 
update_gfn_write_track(struct kvm_memory_slot * slot,gfn_t gfn,short count)75 static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
76 				   short count)
77 {
78 	int index, val;
79 
80 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
81 
82 	val = slot->arch.gfn_write_track[index];
83 
84 	if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
85 		return;
86 
87 	slot->arch.gfn_write_track[index] += count;
88 }
89 
__kvm_write_track_add_gfn(struct kvm * kvm,struct kvm_memory_slot * slot,gfn_t gfn)90 void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
91 			       gfn_t gfn)
92 {
93 	lockdep_assert_held_write(&kvm->mmu_lock);
94 
95 	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
96 			    srcu_read_lock_held(&kvm->srcu));
97 
98 	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
99 		return;
100 
101 	update_gfn_write_track(slot, gfn, 1);
102 
103 	/*
104 	 * new track stops large page mapping for the
105 	 * tracked page.
106 	 */
107 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
108 
109 	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
110 		kvm_flush_remote_tlbs(kvm);
111 }
112 
__kvm_write_track_remove_gfn(struct kvm * kvm,struct kvm_memory_slot * slot,gfn_t gfn)113 void __kvm_write_track_remove_gfn(struct kvm *kvm,
114 				  struct kvm_memory_slot *slot, gfn_t gfn)
115 {
116 	lockdep_assert_held_write(&kvm->mmu_lock);
117 
118 	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
119 			    srcu_read_lock_held(&kvm->srcu));
120 
121 	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
122 		return;
123 
124 	update_gfn_write_track(slot, gfn, -1);
125 
126 	/*
127 	 * allow large page mapping for the tracked page
128 	 * after the tracker is gone.
129 	 */
130 	kvm_mmu_gfn_allow_lpage(slot, gfn);
131 }
132 
133 /*
134  * check if the corresponding access on the specified guest page is tracked.
135  */
kvm_gfn_is_write_tracked(struct kvm * kvm,const struct kvm_memory_slot * slot,gfn_t gfn)136 bool kvm_gfn_is_write_tracked(struct kvm *kvm,
137 			      const struct kvm_memory_slot *slot, gfn_t gfn)
138 {
139 	int index;
140 
141 	if (!slot)
142 		return false;
143 
144 	if (!kvm_page_track_write_tracking_enabled(kvm))
145 		return false;
146 
147 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
148 	return !!READ_ONCE(slot->arch.gfn_write_track[index]);
149 }
150 
151 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
kvm_page_track_cleanup(struct kvm * kvm)152 void kvm_page_track_cleanup(struct kvm *kvm)
153 {
154 	struct kvm_page_track_notifier_head *head;
155 
156 	head = &kvm->arch.track_notifier_head;
157 	cleanup_srcu_struct(&head->track_srcu);
158 }
159 
kvm_page_track_init(struct kvm * kvm)160 int kvm_page_track_init(struct kvm *kvm)
161 {
162 	struct kvm_page_track_notifier_head *head;
163 
164 	head = &kvm->arch.track_notifier_head;
165 	INIT_HLIST_HEAD(&head->track_notifier_list);
166 	return init_srcu_struct(&head->track_srcu);
167 }
168 
kvm_enable_external_write_tracking(struct kvm * kvm)169 static int kvm_enable_external_write_tracking(struct kvm *kvm)
170 {
171 	struct kvm_memslots *slots;
172 	struct kvm_memory_slot *slot;
173 	int r = 0, i, bkt;
174 
175 	if (kvm->arch.vm_type == KVM_X86_TDX_VM)
176 		return -EOPNOTSUPP;
177 
178 	mutex_lock(&kvm->slots_arch_lock);
179 
180 	/*
181 	 * Check for *any* write tracking user (not just external users) under
182 	 * lock.  This avoids unnecessary work, e.g. if KVM itself is using
183 	 * write tracking, or if two external users raced when registering.
184 	 */
185 	if (kvm_page_track_write_tracking_enabled(kvm))
186 		goto out_success;
187 
188 	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
189 		slots = __kvm_memslots(kvm, i);
190 		kvm_for_each_memslot(slot, bkt, slots) {
191 			/*
192 			 * Intentionally do NOT free allocations on failure to
193 			 * avoid having to track which allocations were made
194 			 * now versus when the memslot was created.  The
195 			 * metadata is guaranteed to be freed when the slot is
196 			 * freed, and will be kept/used if userspace retries
197 			 * the failed ioctl() instead of killing the VM.
198 			 */
199 			r = kvm_page_track_write_tracking_alloc(slot);
200 			if (r)
201 				goto out_unlock;
202 		}
203 	}
204 
205 out_success:
206 	/*
207 	 * Ensure that external_write_tracking_enabled becomes true strictly
208 	 * after all the related pointers are set.
209 	 */
210 	smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
211 out_unlock:
212 	mutex_unlock(&kvm->slots_arch_lock);
213 	return r;
214 }
215 
216 /*
217  * register the notifier so that event interception for the tracked guest
218  * pages can be received.
219  */
kvm_page_track_register_notifier(struct kvm * kvm,struct kvm_page_track_notifier_node * n)220 int kvm_page_track_register_notifier(struct kvm *kvm,
221 				     struct kvm_page_track_notifier_node *n)
222 {
223 	struct kvm_page_track_notifier_head *head;
224 	int r;
225 
226 	if (!kvm || kvm->mm != current->mm)
227 		return -ESRCH;
228 
229 	if (!kvm_external_write_tracking_enabled(kvm)) {
230 		r = kvm_enable_external_write_tracking(kvm);
231 		if (r)
232 			return r;
233 	}
234 
235 	kvm_get_kvm(kvm);
236 
237 	head = &kvm->arch.track_notifier_head;
238 
239 	write_lock(&kvm->mmu_lock);
240 	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
241 	write_unlock(&kvm->mmu_lock);
242 	return 0;
243 }
244 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
245 
246 /*
247  * stop receiving the event interception. It is the opposed operation of
248  * kvm_page_track_register_notifier().
249  */
kvm_page_track_unregister_notifier(struct kvm * kvm,struct kvm_page_track_notifier_node * n)250 void kvm_page_track_unregister_notifier(struct kvm *kvm,
251 					struct kvm_page_track_notifier_node *n)
252 {
253 	struct kvm_page_track_notifier_head *head;
254 
255 	head = &kvm->arch.track_notifier_head;
256 
257 	write_lock(&kvm->mmu_lock);
258 	hlist_del_rcu(&n->node);
259 	write_unlock(&kvm->mmu_lock);
260 	synchronize_srcu(&head->track_srcu);
261 
262 	kvm_put_kvm(kvm);
263 }
264 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
265 
266 /*
267  * Notify the node that write access is intercepted and write emulation is
268  * finished at this time.
269  *
270  * The node should figure out if the written page is the one that node is
271  * interested in by itself.
272  */
__kvm_page_track_write(struct kvm * kvm,gpa_t gpa,const u8 * new,int bytes)273 void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
274 {
275 	struct kvm_page_track_notifier_head *head;
276 	struct kvm_page_track_notifier_node *n;
277 	int idx;
278 
279 	head = &kvm->arch.track_notifier_head;
280 
281 	if (hlist_empty(&head->track_notifier_list))
282 		return;
283 
284 	idx = srcu_read_lock(&head->track_srcu);
285 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
286 				  srcu_read_lock_held(&head->track_srcu))
287 		if (n->track_write)
288 			n->track_write(gpa, new, bytes, n);
289 	srcu_read_unlock(&head->track_srcu, idx);
290 }
291 
292 /*
293  * Notify external page track nodes that a memory region is being removed from
294  * the VM, e.g. so that users can free any associated metadata.
295  */
kvm_page_track_delete_slot(struct kvm * kvm,struct kvm_memory_slot * slot)296 void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
297 {
298 	struct kvm_page_track_notifier_head *head;
299 	struct kvm_page_track_notifier_node *n;
300 	int idx;
301 
302 	head = &kvm->arch.track_notifier_head;
303 
304 	if (hlist_empty(&head->track_notifier_list))
305 		return;
306 
307 	idx = srcu_read_lock(&head->track_srcu);
308 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
309 				  srcu_read_lock_held(&head->track_srcu))
310 		if (n->track_remove_region)
311 			n->track_remove_region(slot->base_gfn, slot->npages, n);
312 	srcu_read_unlock(&head->track_srcu, idx);
313 }
314 
315 /*
316  * add guest page to the tracking pool so that corresponding access on that
317  * page will be intercepted.
318  *
319  * @kvm: the guest instance we are interested in.
320  * @gfn: the guest page.
321  */
kvm_write_track_add_gfn(struct kvm * kvm,gfn_t gfn)322 int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
323 {
324 	struct kvm_memory_slot *slot;
325 	int idx;
326 
327 	idx = srcu_read_lock(&kvm->srcu);
328 
329 	slot = gfn_to_memslot(kvm, gfn);
330 	if (!slot) {
331 		srcu_read_unlock(&kvm->srcu, idx);
332 		return -EINVAL;
333 	}
334 
335 	write_lock(&kvm->mmu_lock);
336 	__kvm_write_track_add_gfn(kvm, slot, gfn);
337 	write_unlock(&kvm->mmu_lock);
338 
339 	srcu_read_unlock(&kvm->srcu, idx);
340 
341 	return 0;
342 }
343 EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
344 
345 /*
346  * remove the guest page from the tracking pool which stops the interception
347  * of corresponding access on that page.
348  *
349  * @kvm: the guest instance we are interested in.
350  * @gfn: the guest page.
351  */
kvm_write_track_remove_gfn(struct kvm * kvm,gfn_t gfn)352 int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
353 {
354 	struct kvm_memory_slot *slot;
355 	int idx;
356 
357 	idx = srcu_read_lock(&kvm->srcu);
358 
359 	slot = gfn_to_memslot(kvm, gfn);
360 	if (!slot) {
361 		srcu_read_unlock(&kvm->srcu, idx);
362 		return -EINVAL;
363 	}
364 
365 	write_lock(&kvm->mmu_lock);
366 	__kvm_write_track_remove_gfn(kvm, slot, gfn);
367 	write_unlock(&kvm->mmu_lock);
368 
369 	srcu_read_unlock(&kvm->srcu, idx);
370 
371 	return 0;
372 }
373 EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
374 #endif
375