1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/backing-dev.h>
3 #include <linux/falloc.h>
4 #include <linux/kvm_host.h>
5 #include <linux/pagemap.h>
6 #include <linux/anon_inodes.h>
7
8 #include "kvm_mm.h"
9
10 struct kvm_gmem {
11 struct kvm *kvm;
12 struct xarray bindings;
13 struct list_head entry;
14 };
15
16 /**
17 * folio_file_pfn - like folio_file_page, but return a pfn.
18 * @folio: The folio which contains this index.
19 * @index: The index we want to look up.
20 *
21 * Return: The pfn for this index.
22 */
folio_file_pfn(struct folio * folio,pgoff_t index)23 static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
24 {
25 return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
26 }
27
__kvm_gmem_prepare_folio(struct kvm * kvm,struct kvm_memory_slot * slot,pgoff_t index,struct folio * folio)28 static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
29 pgoff_t index, struct folio *folio)
30 {
31 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
32 kvm_pfn_t pfn = folio_file_pfn(folio, index);
33 gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
34 int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
35 if (rc) {
36 pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
37 index, gfn, pfn, rc);
38 return rc;
39 }
40 #endif
41
42 return 0;
43 }
44
kvm_gmem_mark_prepared(struct folio * folio)45 static inline void kvm_gmem_mark_prepared(struct folio *folio)
46 {
47 folio_mark_uptodate(folio);
48 }
49
50 /*
51 * Process @folio, which contains @gfn, so that the guest can use it.
52 * The folio must be locked and the gfn must be contained in @slot.
53 * On successful return the guest sees a zero page so as to avoid
54 * leaking host data and the up-to-date flag is set.
55 */
kvm_gmem_prepare_folio(struct kvm * kvm,struct kvm_memory_slot * slot,gfn_t gfn,struct folio * folio)56 static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
57 gfn_t gfn, struct folio *folio)
58 {
59 unsigned long nr_pages, i;
60 pgoff_t index;
61 int r;
62
63 nr_pages = folio_nr_pages(folio);
64 for (i = 0; i < nr_pages; i++)
65 clear_highpage(folio_page(folio, i));
66
67 /*
68 * Preparing huge folios should always be safe, since it should
69 * be possible to split them later if needed.
70 *
71 * Right now the folio order is always going to be zero, but the
72 * code is ready for huge folios. The only assumption is that
73 * the base pgoff of memslots is naturally aligned with the
74 * requested page order, ensuring that huge folios can also use
75 * huge page table entries for GPA->HPA mapping.
76 *
77 * The order will be passed when creating the guest_memfd, and
78 * checked when creating memslots.
79 */
80 WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
81 index = gfn - slot->base_gfn + slot->gmem.pgoff;
82 index = ALIGN_DOWN(index, 1 << folio_order(folio));
83 r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
84 if (!r)
85 kvm_gmem_mark_prepared(folio);
86
87 return r;
88 }
89
90 /*
91 * Returns a locked folio on success. The caller is responsible for
92 * setting the up-to-date flag before the memory is mapped into the guest.
93 * There is no backing storage for the memory, so the folio will remain
94 * up-to-date until it's removed.
95 *
96 * Ignore accessed, referenced, and dirty flags. The memory is
97 * unevictable and there is no storage to write back to.
98 */
kvm_gmem_get_folio(struct inode * inode,pgoff_t index)99 static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
100 {
101 /* TODO: Support huge pages. */
102 return filemap_grab_folio(inode->i_mapping, index);
103 }
104
kvm_gmem_get_invalidate_filter(struct inode * inode)105 static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode)
106 {
107 if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)
108 return KVM_FILTER_SHARED;
109
110 return KVM_FILTER_PRIVATE;
111 }
112
__kvm_gmem_invalidate_begin(struct kvm_gmem * gmem,pgoff_t start,pgoff_t end,enum kvm_gfn_range_filter attr_filter)113 static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
114 pgoff_t end,
115 enum kvm_gfn_range_filter attr_filter)
116 {
117 bool flush = false, found_memslot = false;
118 struct kvm_memory_slot *slot;
119 struct kvm *kvm = gmem->kvm;
120 unsigned long index;
121
122 xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
123 pgoff_t pgoff = slot->gmem.pgoff;
124
125 struct kvm_gfn_range gfn_range = {
126 .start = slot->base_gfn + max(pgoff, start) - pgoff,
127 .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
128 .slot = slot,
129 .may_block = true,
130 .attr_filter = attr_filter,
131 };
132
133 if (!found_memslot) {
134 found_memslot = true;
135
136 KVM_MMU_LOCK(kvm);
137 kvm_mmu_invalidate_begin(kvm);
138 }
139
140 flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
141 }
142
143 if (flush)
144 kvm_flush_remote_tlbs(kvm);
145
146 if (found_memslot)
147 KVM_MMU_UNLOCK(kvm);
148 }
149
kvm_gmem_invalidate_begin(struct inode * inode,pgoff_t start,pgoff_t end)150 static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start,
151 pgoff_t end)
152 {
153 struct list_head *gmem_list = &inode->i_mapping->i_private_list;
154 enum kvm_gfn_range_filter attr_filter;
155 struct kvm_gmem *gmem;
156
157 attr_filter = kvm_gmem_get_invalidate_filter(inode);
158
159 list_for_each_entry(gmem, gmem_list, entry)
160 __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter);
161 }
162
__kvm_gmem_invalidate_end(struct kvm_gmem * gmem,pgoff_t start,pgoff_t end)163 static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
164 pgoff_t end)
165 {
166 struct kvm *kvm = gmem->kvm;
167
168 if (xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
169 KVM_MMU_LOCK(kvm);
170 kvm_mmu_invalidate_end(kvm);
171 KVM_MMU_UNLOCK(kvm);
172 }
173 }
174
kvm_gmem_invalidate_end(struct inode * inode,pgoff_t start,pgoff_t end)175 static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
176 pgoff_t end)
177 {
178 struct list_head *gmem_list = &inode->i_mapping->i_private_list;
179 struct kvm_gmem *gmem;
180
181 list_for_each_entry(gmem, gmem_list, entry)
182 __kvm_gmem_invalidate_end(gmem, start, end);
183 }
184
kvm_gmem_punch_hole(struct inode * inode,loff_t offset,loff_t len)185 static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
186 {
187 pgoff_t start = offset >> PAGE_SHIFT;
188 pgoff_t end = (offset + len) >> PAGE_SHIFT;
189
190 /*
191 * Bindings must be stable across invalidation to ensure the start+end
192 * are balanced.
193 */
194 filemap_invalidate_lock(inode->i_mapping);
195
196 kvm_gmem_invalidate_begin(inode, start, end);
197
198 truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
199
200 kvm_gmem_invalidate_end(inode, start, end);
201
202 filemap_invalidate_unlock(inode->i_mapping);
203
204 return 0;
205 }
206
kvm_gmem_allocate(struct inode * inode,loff_t offset,loff_t len)207 static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
208 {
209 struct address_space *mapping = inode->i_mapping;
210 pgoff_t start, index, end;
211 int r;
212
213 /* Dedicated guest is immutable by default. */
214 if (offset + len > i_size_read(inode))
215 return -EINVAL;
216
217 filemap_invalidate_lock_shared(mapping);
218
219 start = offset >> PAGE_SHIFT;
220 end = (offset + len) >> PAGE_SHIFT;
221
222 r = 0;
223 for (index = start; index < end; ) {
224 struct folio *folio;
225
226 if (signal_pending(current)) {
227 r = -EINTR;
228 break;
229 }
230
231 folio = kvm_gmem_get_folio(inode, index);
232 if (IS_ERR(folio)) {
233 r = PTR_ERR(folio);
234 break;
235 }
236
237 index = folio_next_index(folio);
238
239 folio_unlock(folio);
240 folio_put(folio);
241
242 /* 64-bit only, wrapping the index should be impossible. */
243 if (WARN_ON_ONCE(!index))
244 break;
245
246 cond_resched();
247 }
248
249 filemap_invalidate_unlock_shared(mapping);
250
251 return r;
252 }
253
kvm_gmem_fallocate(struct file * file,int mode,loff_t offset,loff_t len)254 static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
255 loff_t len)
256 {
257 int ret;
258
259 if (!(mode & FALLOC_FL_KEEP_SIZE))
260 return -EOPNOTSUPP;
261
262 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
263 return -EOPNOTSUPP;
264
265 if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
266 return -EINVAL;
267
268 if (mode & FALLOC_FL_PUNCH_HOLE)
269 ret = kvm_gmem_punch_hole(file_inode(file), offset, len);
270 else
271 ret = kvm_gmem_allocate(file_inode(file), offset, len);
272
273 if (!ret)
274 file_modified(file);
275 return ret;
276 }
277
kvm_gmem_release(struct inode * inode,struct file * file)278 static int kvm_gmem_release(struct inode *inode, struct file *file)
279 {
280 struct kvm_gmem *gmem = file->private_data;
281 struct kvm_memory_slot *slot;
282 struct kvm *kvm = gmem->kvm;
283 unsigned long index;
284
285 /*
286 * Prevent concurrent attempts to *unbind* a memslot. This is the last
287 * reference to the file and thus no new bindings can be created, but
288 * dereferencing the slot for existing bindings needs to be protected
289 * against memslot updates, specifically so that unbind doesn't race
290 * and free the memslot (kvm_gmem_get_file() will return NULL).
291 *
292 * Since .release is called only when the reference count is zero,
293 * after which file_ref_get() and get_file_active() fail,
294 * kvm_gmem_get_pfn() cannot be using the file concurrently.
295 * file_ref_put() provides a full barrier, and get_file_active() the
296 * matching acquire barrier.
297 */
298 mutex_lock(&kvm->slots_lock);
299
300 filemap_invalidate_lock(inode->i_mapping);
301
302 xa_for_each(&gmem->bindings, index, slot)
303 WRITE_ONCE(slot->gmem.file, NULL);
304
305 /*
306 * All in-flight operations are gone and new bindings can be created.
307 * Zap all SPTEs pointed at by this file. Do not free the backing
308 * memory, as its lifetime is associated with the inode, not the file.
309 */
310 __kvm_gmem_invalidate_begin(gmem, 0, -1ul,
311 kvm_gmem_get_invalidate_filter(inode));
312 __kvm_gmem_invalidate_end(gmem, 0, -1ul);
313
314 list_del(&gmem->entry);
315
316 filemap_invalidate_unlock(inode->i_mapping);
317
318 mutex_unlock(&kvm->slots_lock);
319
320 xa_destroy(&gmem->bindings);
321 kfree(gmem);
322
323 kvm_put_kvm(kvm);
324
325 return 0;
326 }
327
kvm_gmem_get_file(struct kvm_memory_slot * slot)328 static inline struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
329 {
330 /*
331 * Do not return slot->gmem.file if it has already been closed;
332 * there might be some time between the last fput() and when
333 * kvm_gmem_release() clears slot->gmem.file.
334 */
335 return get_file_active(&slot->gmem.file);
336 }
337
kvm_gmem_get_index(struct kvm_memory_slot * slot,gfn_t gfn)338 static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
339 {
340 return gfn - slot->base_gfn + slot->gmem.pgoff;
341 }
342
kvm_gmem_supports_mmap(struct inode * inode)343 static bool kvm_gmem_supports_mmap(struct inode *inode)
344 {
345 const u64 flags = (u64)inode->i_private;
346
347 return flags & GUEST_MEMFD_FLAG_MMAP;
348 }
349
kvm_gmem_fault_user_mapping(struct vm_fault * vmf)350 static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
351 {
352 struct inode *inode = file_inode(vmf->vma->vm_file);
353 struct folio *folio;
354 vm_fault_t ret = VM_FAULT_LOCKED;
355
356 if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
357 return VM_FAULT_SIGBUS;
358
359 if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED))
360 return VM_FAULT_SIGBUS;
361
362 folio = kvm_gmem_get_folio(inode, vmf->pgoff);
363 if (IS_ERR(folio)) {
364 int err = PTR_ERR(folio);
365
366 if (err == -EAGAIN)
367 return VM_FAULT_RETRY;
368
369 return vmf_error(err);
370 }
371
372 if (WARN_ON_ONCE(folio_test_large(folio))) {
373 ret = VM_FAULT_SIGBUS;
374 goto out_folio;
375 }
376
377 if (!folio_test_uptodate(folio)) {
378 clear_highpage(folio_page(folio, 0));
379 kvm_gmem_mark_prepared(folio);
380 }
381
382 vmf->page = folio_file_page(folio, vmf->pgoff);
383
384 out_folio:
385 if (ret != VM_FAULT_LOCKED) {
386 folio_unlock(folio);
387 folio_put(folio);
388 }
389
390 return ret;
391 }
392
393 static const struct vm_operations_struct kvm_gmem_vm_ops = {
394 .fault = kvm_gmem_fault_user_mapping,
395 };
396
kvm_gmem_mmap(struct file * file,struct vm_area_struct * vma)397 static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
398 {
399 if (!kvm_gmem_supports_mmap(file_inode(file)))
400 return -ENODEV;
401
402 if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
403 (VM_SHARED | VM_MAYSHARE)) {
404 return -EINVAL;
405 }
406
407 vma->vm_ops = &kvm_gmem_vm_ops;
408
409 return 0;
410 }
411
412 static struct file_operations kvm_gmem_fops = {
413 .mmap = kvm_gmem_mmap,
414 .open = generic_file_open,
415 .release = kvm_gmem_release,
416 .fallocate = kvm_gmem_fallocate,
417 };
418
kvm_gmem_init(struct module * module)419 void kvm_gmem_init(struct module *module)
420 {
421 kvm_gmem_fops.owner = module;
422 }
423
kvm_gmem_migrate_folio(struct address_space * mapping,struct folio * dst,struct folio * src,enum migrate_mode mode)424 static int kvm_gmem_migrate_folio(struct address_space *mapping,
425 struct folio *dst, struct folio *src,
426 enum migrate_mode mode)
427 {
428 WARN_ON_ONCE(1);
429 return -EINVAL;
430 }
431
kvm_gmem_error_folio(struct address_space * mapping,struct folio * folio)432 static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
433 {
434 pgoff_t start, end;
435
436 filemap_invalidate_lock_shared(mapping);
437
438 start = folio->index;
439 end = start + folio_nr_pages(folio);
440
441 kvm_gmem_invalidate_begin(mapping->host, start, end);
442
443 /*
444 * Do not truncate the range, what action is taken in response to the
445 * error is userspace's decision (assuming the architecture supports
446 * gracefully handling memory errors). If/when the guest attempts to
447 * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
448 * at which point KVM can either terminate the VM or propagate the
449 * error to userspace.
450 */
451
452 kvm_gmem_invalidate_end(mapping->host, start, end);
453
454 filemap_invalidate_unlock_shared(mapping);
455
456 return MF_DELAYED;
457 }
458
459 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
kvm_gmem_free_folio(struct folio * folio)460 static void kvm_gmem_free_folio(struct folio *folio)
461 {
462 struct page *page = folio_page(folio, 0);
463 kvm_pfn_t pfn = page_to_pfn(page);
464 int order = folio_order(folio);
465
466 kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
467 }
468 #endif
469
470 static const struct address_space_operations kvm_gmem_aops = {
471 .dirty_folio = noop_dirty_folio,
472 .migrate_folio = kvm_gmem_migrate_folio,
473 .error_remove_folio = kvm_gmem_error_folio,
474 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
475 .free_folio = kvm_gmem_free_folio,
476 #endif
477 };
478
kvm_gmem_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr)479 static int kvm_gmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
480 struct iattr *attr)
481 {
482 return -EINVAL;
483 }
484 static const struct inode_operations kvm_gmem_iops = {
485 .setattr = kvm_gmem_setattr,
486 };
487
kvm_arch_supports_gmem_init_shared(struct kvm * kvm)488 bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm)
489 {
490 return true;
491 }
492
__kvm_gmem_create(struct kvm * kvm,loff_t size,u64 flags)493 static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
494 {
495 const char *anon_name = "[kvm-gmem]";
496 struct kvm_gmem *gmem;
497 struct inode *inode;
498 struct file *file;
499 int fd, err;
500
501 fd = get_unused_fd_flags(0);
502 if (fd < 0)
503 return fd;
504
505 gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
506 if (!gmem) {
507 err = -ENOMEM;
508 goto err_fd;
509 }
510
511 file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
512 O_RDWR, NULL);
513 if (IS_ERR(file)) {
514 err = PTR_ERR(file);
515 goto err_gmem;
516 }
517
518 file->f_flags |= O_LARGEFILE;
519
520 inode = file->f_inode;
521 WARN_ON(file->f_mapping != inode->i_mapping);
522
523 inode->i_private = (void *)(unsigned long)flags;
524 inode->i_op = &kvm_gmem_iops;
525 inode->i_mapping->a_ops = &kvm_gmem_aops;
526 inode->i_mode |= S_IFREG;
527 inode->i_size = size;
528 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
529 mapping_set_inaccessible(inode->i_mapping);
530 /* Unmovable mappings are supposed to be marked unevictable as well. */
531 WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
532
533 kvm_get_kvm(kvm);
534 gmem->kvm = kvm;
535 xa_init(&gmem->bindings);
536 list_add(&gmem->entry, &inode->i_mapping->i_private_list);
537
538 fd_install(fd, file);
539 return fd;
540
541 err_gmem:
542 kfree(gmem);
543 err_fd:
544 put_unused_fd(fd);
545 return err;
546 }
547
kvm_gmem_create(struct kvm * kvm,struct kvm_create_guest_memfd * args)548 int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
549 {
550 loff_t size = args->size;
551 u64 flags = args->flags;
552
553 if (flags & ~kvm_gmem_get_supported_flags(kvm))
554 return -EINVAL;
555
556 if (size <= 0 || !PAGE_ALIGNED(size))
557 return -EINVAL;
558
559 return __kvm_gmem_create(kvm, size, flags);
560 }
561
kvm_gmem_bind(struct kvm * kvm,struct kvm_memory_slot * slot,unsigned int fd,loff_t offset)562 int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
563 unsigned int fd, loff_t offset)
564 {
565 loff_t size = slot->npages << PAGE_SHIFT;
566 unsigned long start, end;
567 struct kvm_gmem *gmem;
568 struct inode *inode;
569 struct file *file;
570 int r = -EINVAL;
571
572 BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->gmem.pgoff));
573
574 file = fget(fd);
575 if (!file)
576 return -EBADF;
577
578 if (file->f_op != &kvm_gmem_fops)
579 goto err;
580
581 gmem = file->private_data;
582 if (gmem->kvm != kvm)
583 goto err;
584
585 inode = file_inode(file);
586
587 if (offset < 0 || !PAGE_ALIGNED(offset) ||
588 offset + size > i_size_read(inode))
589 goto err;
590
591 filemap_invalidate_lock(inode->i_mapping);
592
593 start = offset >> PAGE_SHIFT;
594 end = start + slot->npages;
595
596 if (!xa_empty(&gmem->bindings) &&
597 xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
598 filemap_invalidate_unlock(inode->i_mapping);
599 goto err;
600 }
601
602 /*
603 * memslots of flag KVM_MEM_GUEST_MEMFD are immutable to change, so
604 * kvm_gmem_bind() must occur on a new memslot. Because the memslot
605 * is not visible yet, kvm_gmem_get_pfn() is guaranteed to see the file.
606 */
607 WRITE_ONCE(slot->gmem.file, file);
608 slot->gmem.pgoff = start;
609 if (kvm_gmem_supports_mmap(inode))
610 slot->flags |= KVM_MEMSLOT_GMEM_ONLY;
611
612 xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL);
613 filemap_invalidate_unlock(inode->i_mapping);
614
615 /*
616 * Drop the reference to the file, even on success. The file pins KVM,
617 * not the other way 'round. Active bindings are invalidated if the
618 * file is closed before memslots are destroyed.
619 */
620 r = 0;
621 err:
622 fput(file);
623 return r;
624 }
625
kvm_gmem_unbind(struct kvm_memory_slot * slot)626 void kvm_gmem_unbind(struct kvm_memory_slot *slot)
627 {
628 unsigned long start = slot->gmem.pgoff;
629 unsigned long end = start + slot->npages;
630 struct kvm_gmem *gmem;
631 struct file *file;
632
633 /*
634 * Nothing to do if the underlying file was already closed (or is being
635 * closed right now), kvm_gmem_release() invalidates all bindings.
636 */
637 file = kvm_gmem_get_file(slot);
638 if (!file)
639 return;
640
641 gmem = file->private_data;
642
643 filemap_invalidate_lock(file->f_mapping);
644 xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
645
646 /*
647 * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
648 * cannot see this memslot.
649 */
650 WRITE_ONCE(slot->gmem.file, NULL);
651 filemap_invalidate_unlock(file->f_mapping);
652
653 fput(file);
654 }
655
656 /* Returns a locked folio on success. */
__kvm_gmem_get_pfn(struct file * file,struct kvm_memory_slot * slot,pgoff_t index,kvm_pfn_t * pfn,bool * is_prepared,int * max_order)657 static struct folio *__kvm_gmem_get_pfn(struct file *file,
658 struct kvm_memory_slot *slot,
659 pgoff_t index, kvm_pfn_t *pfn,
660 bool *is_prepared, int *max_order)
661 {
662 struct file *gmem_file = READ_ONCE(slot->gmem.file);
663 struct kvm_gmem *gmem = file->private_data;
664 struct folio *folio;
665
666 if (file != gmem_file) {
667 WARN_ON_ONCE(gmem_file);
668 return ERR_PTR(-EFAULT);
669 }
670
671 gmem = file->private_data;
672 if (xa_load(&gmem->bindings, index) != slot) {
673 WARN_ON_ONCE(xa_load(&gmem->bindings, index));
674 return ERR_PTR(-EIO);
675 }
676
677 folio = kvm_gmem_get_folio(file_inode(file), index);
678 if (IS_ERR(folio))
679 return folio;
680
681 if (folio_test_hwpoison(folio)) {
682 folio_unlock(folio);
683 folio_put(folio);
684 return ERR_PTR(-EHWPOISON);
685 }
686
687 *pfn = folio_file_pfn(folio, index);
688 if (max_order)
689 *max_order = 0;
690
691 *is_prepared = folio_test_uptodate(folio);
692 return folio;
693 }
694
kvm_gmem_get_pfn(struct kvm * kvm,struct kvm_memory_slot * slot,gfn_t gfn,kvm_pfn_t * pfn,struct page ** page,int * max_order)695 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
696 gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
697 int *max_order)
698 {
699 pgoff_t index = kvm_gmem_get_index(slot, gfn);
700 struct file *file = kvm_gmem_get_file(slot);
701 struct folio *folio;
702 bool is_prepared = false;
703 int r = 0;
704
705 if (!file)
706 return -EFAULT;
707
708 folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order);
709 if (IS_ERR(folio)) {
710 r = PTR_ERR(folio);
711 goto out;
712 }
713
714 if (!is_prepared)
715 r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
716
717 folio_unlock(folio);
718
719 if (!r)
720 *page = folio_file_page(folio, index);
721 else
722 folio_put(folio);
723
724 out:
725 fput(file);
726 return r;
727 }
728 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn);
729
730 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
kvm_gmem_populate(struct kvm * kvm,gfn_t start_gfn,void __user * src,long npages,kvm_gmem_populate_cb post_populate,void * opaque)731 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
732 kvm_gmem_populate_cb post_populate, void *opaque)
733 {
734 struct file *file;
735 struct kvm_memory_slot *slot;
736 void __user *p;
737
738 int ret = 0, max_order;
739 long i;
740
741 lockdep_assert_held(&kvm->slots_lock);
742
743 if (WARN_ON_ONCE(npages <= 0))
744 return -EINVAL;
745
746 slot = gfn_to_memslot(kvm, start_gfn);
747 if (!kvm_slot_has_gmem(slot))
748 return -EINVAL;
749
750 file = kvm_gmem_get_file(slot);
751 if (!file)
752 return -EFAULT;
753
754 filemap_invalidate_lock(file->f_mapping);
755
756 npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
757 for (i = 0; i < npages; i += (1 << max_order)) {
758 struct folio *folio;
759 gfn_t gfn = start_gfn + i;
760 pgoff_t index = kvm_gmem_get_index(slot, gfn);
761 bool is_prepared = false;
762 kvm_pfn_t pfn;
763
764 if (signal_pending(current)) {
765 ret = -EINTR;
766 break;
767 }
768
769 folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, &max_order);
770 if (IS_ERR(folio)) {
771 ret = PTR_ERR(folio);
772 break;
773 }
774
775 if (is_prepared) {
776 folio_unlock(folio);
777 folio_put(folio);
778 ret = -EEXIST;
779 break;
780 }
781
782 folio_unlock(folio);
783 WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
784 (npages - i) < (1 << max_order));
785
786 ret = -EINVAL;
787 while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
788 KVM_MEMORY_ATTRIBUTE_PRIVATE,
789 KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
790 if (!max_order)
791 goto put_folio_and_exit;
792 max_order--;
793 }
794
795 p = src ? src + i * PAGE_SIZE : NULL;
796 ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
797 if (!ret)
798 kvm_gmem_mark_prepared(folio);
799
800 put_folio_and_exit:
801 folio_put(folio);
802 if (ret)
803 break;
804 }
805
806 filemap_invalidate_unlock(file->f_mapping);
807
808 fput(file);
809 return ret && !i ? ret : i;
810 }
811 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate);
812 #endif
813