1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Support KVM gust page tracking 4 * 5 * This feature allows us to track page access in guest. Currently, only 6 * write access is tracked. 7 * 8 * Copyright(C) 2015 Intel Corporation. 9 * 10 * Author: 11 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 12 */ 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/lockdep.h> 16 #include <linux/kvm_host.h> 17 #include <linux/rculist.h> 18 19 #include "mmu.h" 20 #include "mmu_internal.h" 21 #include "page_track.h" 22 23 static bool kvm_external_write_tracking_enabled(struct kvm *kvm) 24 { 25 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING 26 /* 27 * Read external_write_tracking_enabled before related pointers. Pairs 28 * with the smp_store_release in kvm_page_track_write_tracking_enable(). 29 */ 30 return smp_load_acquire(&kvm->arch.external_write_tracking_enabled); 31 #else 32 return false; 33 #endif 34 } 35 36 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) 37 { 38 return kvm_external_write_tracking_enabled(kvm) || 39 kvm_shadow_root_allocated(kvm) || !tdp_enabled; 40 } 41 42 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) 43 { 44 kvfree(slot->arch.gfn_write_track); 45 slot->arch.gfn_write_track = NULL; 46 } 47 48 static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, 49 unsigned long npages) 50 { 51 const size_t size = sizeof(*slot->arch.gfn_write_track); 52 53 if (!slot->arch.gfn_write_track) 54 slot->arch.gfn_write_track = __vcalloc(npages, size, 55 GFP_KERNEL_ACCOUNT); 56 57 return slot->arch.gfn_write_track ? 0 : -ENOMEM; 58 } 59 60 int kvm_page_track_create_memslot(struct kvm *kvm, 61 struct kvm_memory_slot *slot, 62 unsigned long npages) 63 { 64 if (!kvm_page_track_write_tracking_enabled(kvm)) 65 return 0; 66 67 return __kvm_page_track_write_tracking_alloc(slot, npages); 68 } 69 70 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) 71 { 72 return __kvm_page_track_write_tracking_alloc(slot, slot->npages); 73 } 74 75 static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, 76 short count) 77 { 78 int index, val; 79 80 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 81 82 val = slot->arch.gfn_write_track[index]; 83 84 if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) 85 return; 86 87 slot->arch.gfn_write_track[index] += count; 88 } 89 90 void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 91 gfn_t gfn) 92 { 93 lockdep_assert_held_write(&kvm->mmu_lock); 94 95 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 96 srcu_read_lock_held(&kvm->srcu)); 97 98 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 99 return; 100 101 update_gfn_write_track(slot, gfn, 1); 102 103 /* 104 * new track stops large page mapping for the 105 * tracked page. 106 */ 107 kvm_mmu_gfn_disallow_lpage(slot, gfn); 108 109 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) 110 kvm_flush_remote_tlbs(kvm); 111 } 112 113 void __kvm_write_track_remove_gfn(struct kvm *kvm, 114 struct kvm_memory_slot *slot, gfn_t gfn) 115 { 116 lockdep_assert_held_write(&kvm->mmu_lock); 117 118 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 119 srcu_read_lock_held(&kvm->srcu)); 120 121 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 122 return; 123 124 update_gfn_write_track(slot, gfn, -1); 125 126 /* 127 * allow large page mapping for the tracked page 128 * after the tracker is gone. 129 */ 130 kvm_mmu_gfn_allow_lpage(slot, gfn); 131 } 132 133 /* 134 * check if the corresponding access on the specified guest page is tracked. 135 */ 136 bool kvm_gfn_is_write_tracked(struct kvm *kvm, 137 const struct kvm_memory_slot *slot, gfn_t gfn) 138 { 139 int index; 140 141 if (!slot) 142 return false; 143 144 if (!kvm_page_track_write_tracking_enabled(kvm)) 145 return false; 146 147 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 148 return !!READ_ONCE(slot->arch.gfn_write_track[index]); 149 } 150 151 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING 152 void kvm_page_track_cleanup(struct kvm *kvm) 153 { 154 struct kvm_page_track_notifier_head *head; 155 156 head = &kvm->arch.track_notifier_head; 157 cleanup_srcu_struct(&head->track_srcu); 158 } 159 160 int kvm_page_track_init(struct kvm *kvm) 161 { 162 struct kvm_page_track_notifier_head *head; 163 164 head = &kvm->arch.track_notifier_head; 165 INIT_HLIST_HEAD(&head->track_notifier_list); 166 return init_srcu_struct(&head->track_srcu); 167 } 168 169 static int kvm_enable_external_write_tracking(struct kvm *kvm) 170 { 171 struct kvm_memslots *slots; 172 struct kvm_memory_slot *slot; 173 int r = 0, i, bkt; 174 175 mutex_lock(&kvm->slots_arch_lock); 176 177 /* 178 * Check for *any* write tracking user (not just external users) under 179 * lock. This avoids unnecessary work, e.g. if KVM itself is using 180 * write tracking, or if two external users raced when registering. 181 */ 182 if (kvm_page_track_write_tracking_enabled(kvm)) 183 goto out_success; 184 185 for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { 186 slots = __kvm_memslots(kvm, i); 187 kvm_for_each_memslot(slot, bkt, slots) { 188 /* 189 * Intentionally do NOT free allocations on failure to 190 * avoid having to track which allocations were made 191 * now versus when the memslot was created. The 192 * metadata is guaranteed to be freed when the slot is 193 * freed, and will be kept/used if userspace retries 194 * the failed ioctl() instead of killing the VM. 195 */ 196 r = kvm_page_track_write_tracking_alloc(slot); 197 if (r) 198 goto out_unlock; 199 } 200 } 201 202 out_success: 203 /* 204 * Ensure that external_write_tracking_enabled becomes true strictly 205 * after all the related pointers are set. 206 */ 207 smp_store_release(&kvm->arch.external_write_tracking_enabled, true); 208 out_unlock: 209 mutex_unlock(&kvm->slots_arch_lock); 210 return r; 211 } 212 213 /* 214 * register the notifier so that event interception for the tracked guest 215 * pages can be received. 216 */ 217 int kvm_page_track_register_notifier(struct kvm *kvm, 218 struct kvm_page_track_notifier_node *n) 219 { 220 struct kvm_page_track_notifier_head *head; 221 int r; 222 223 if (!kvm || kvm->mm != current->mm) 224 return -ESRCH; 225 226 if (!kvm_external_write_tracking_enabled(kvm)) { 227 r = kvm_enable_external_write_tracking(kvm); 228 if (r) 229 return r; 230 } 231 232 kvm_get_kvm(kvm); 233 234 head = &kvm->arch.track_notifier_head; 235 236 write_lock(&kvm->mmu_lock); 237 hlist_add_head_rcu(&n->node, &head->track_notifier_list); 238 write_unlock(&kvm->mmu_lock); 239 return 0; 240 } 241 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); 242 243 /* 244 * stop receiving the event interception. It is the opposed operation of 245 * kvm_page_track_register_notifier(). 246 */ 247 void kvm_page_track_unregister_notifier(struct kvm *kvm, 248 struct kvm_page_track_notifier_node *n) 249 { 250 struct kvm_page_track_notifier_head *head; 251 252 head = &kvm->arch.track_notifier_head; 253 254 write_lock(&kvm->mmu_lock); 255 hlist_del_rcu(&n->node); 256 write_unlock(&kvm->mmu_lock); 257 synchronize_srcu(&head->track_srcu); 258 259 kvm_put_kvm(kvm); 260 } 261 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); 262 263 /* 264 * Notify the node that write access is intercepted and write emulation is 265 * finished at this time. 266 * 267 * The node should figure out if the written page is the one that node is 268 * interested in by itself. 269 */ 270 void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) 271 { 272 struct kvm_page_track_notifier_head *head; 273 struct kvm_page_track_notifier_node *n; 274 int idx; 275 276 head = &kvm->arch.track_notifier_head; 277 278 if (hlist_empty(&head->track_notifier_list)) 279 return; 280 281 idx = srcu_read_lock(&head->track_srcu); 282 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 283 srcu_read_lock_held(&head->track_srcu)) 284 if (n->track_write) 285 n->track_write(gpa, new, bytes, n); 286 srcu_read_unlock(&head->track_srcu, idx); 287 } 288 289 /* 290 * Notify external page track nodes that a memory region is being removed from 291 * the VM, e.g. so that users can free any associated metadata. 292 */ 293 void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 294 { 295 struct kvm_page_track_notifier_head *head; 296 struct kvm_page_track_notifier_node *n; 297 int idx; 298 299 head = &kvm->arch.track_notifier_head; 300 301 if (hlist_empty(&head->track_notifier_list)) 302 return; 303 304 idx = srcu_read_lock(&head->track_srcu); 305 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 306 srcu_read_lock_held(&head->track_srcu)) 307 if (n->track_remove_region) 308 n->track_remove_region(slot->base_gfn, slot->npages, n); 309 srcu_read_unlock(&head->track_srcu, idx); 310 } 311 312 /* 313 * add guest page to the tracking pool so that corresponding access on that 314 * page will be intercepted. 315 * 316 * @kvm: the guest instance we are interested in. 317 * @gfn: the guest page. 318 */ 319 int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) 320 { 321 struct kvm_memory_slot *slot; 322 int idx; 323 324 idx = srcu_read_lock(&kvm->srcu); 325 326 slot = gfn_to_memslot(kvm, gfn); 327 if (!slot) { 328 srcu_read_unlock(&kvm->srcu, idx); 329 return -EINVAL; 330 } 331 332 write_lock(&kvm->mmu_lock); 333 __kvm_write_track_add_gfn(kvm, slot, gfn); 334 write_unlock(&kvm->mmu_lock); 335 336 srcu_read_unlock(&kvm->srcu, idx); 337 338 return 0; 339 } 340 EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); 341 342 /* 343 * remove the guest page from the tracking pool which stops the interception 344 * of corresponding access on that page. 345 * 346 * @kvm: the guest instance we are interested in. 347 * @gfn: the guest page. 348 */ 349 int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) 350 { 351 struct kvm_memory_slot *slot; 352 int idx; 353 354 idx = srcu_read_lock(&kvm->srcu); 355 356 slot = gfn_to_memslot(kvm, gfn); 357 if (!slot) { 358 srcu_read_unlock(&kvm->srcu, idx); 359 return -EINVAL; 360 } 361 362 write_lock(&kvm->mmu_lock); 363 __kvm_write_track_remove_gfn(kvm, slot, gfn); 364 write_unlock(&kvm->mmu_lock); 365 366 srcu_read_unlock(&kvm->srcu, idx); 367 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); 371 #endif 372