xref: /linux/mm/truncate.c (revision 3ba84ac69b53e6ee07c31d54554e00793d7b144f)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * mm/truncate.c - code for taking down pages from address_spaces
4   *
5   * Copyright (C) 2002, Linus Torvalds
6   *
7   * 10Sep2002	Andrew Morton
8   *		Initial version.
9   */
10  
11  #include <linux/kernel.h>
12  #include <linux/backing-dev.h>
13  #include <linux/dax.h>
14  #include <linux/gfp.h>
15  #include <linux/mm.h>
16  #include <linux/swap.h>
17  #include <linux/export.h>
18  #include <linux/pagemap.h>
19  #include <linux/highmem.h>
20  #include <linux/pagevec.h>
21  #include <linux/task_io_accounting_ops.h>
22  #include <linux/shmem_fs.h>
23  #include <linux/rmap.h>
24  #include "internal.h"
25  
26  /*
27   * Regular page slots are stabilized by the page lock even without the tree
28   * itself locked.  These unlocked entries need verification under the tree
29   * lock.
30   */
31  static inline void __clear_shadow_entry(struct address_space *mapping,
32  				pgoff_t index, void *entry)
33  {
34  	XA_STATE(xas, &mapping->i_pages, index);
35  
36  	xas_set_update(&xas, workingset_update_node);
37  	if (xas_load(&xas) != entry)
38  		return;
39  	xas_store(&xas, NULL);
40  }
41  
42  static void clear_shadow_entries(struct address_space *mapping,
43  				 struct folio_batch *fbatch, pgoff_t *indices)
44  {
45  	int i;
46  
47  	/* Handled by shmem itself, or for DAX we do nothing. */
48  	if (shmem_mapping(mapping) || dax_mapping(mapping))
49  		return;
50  
51  	spin_lock(&mapping->host->i_lock);
52  	xa_lock_irq(&mapping->i_pages);
53  
54  	for (i = 0; i < folio_batch_count(fbatch); i++) {
55  		struct folio *folio = fbatch->folios[i];
56  
57  		if (xa_is_value(folio))
58  			__clear_shadow_entry(mapping, indices[i], folio);
59  	}
60  
61  	xa_unlock_irq(&mapping->i_pages);
62  	if (mapping_shrinkable(mapping))
63  		inode_add_lru(mapping->host);
64  	spin_unlock(&mapping->host->i_lock);
65  }
66  
67  /*
68   * Unconditionally remove exceptional entries. Usually called from truncate
69   * path. Note that the folio_batch may be altered by this function by removing
70   * exceptional entries similar to what folio_batch_remove_exceptionals() does.
71   */
72  static void truncate_folio_batch_exceptionals(struct address_space *mapping,
73  				struct folio_batch *fbatch, pgoff_t *indices)
74  {
75  	int i, j;
76  	bool dax;
77  
78  	/* Handled by shmem itself */
79  	if (shmem_mapping(mapping))
80  		return;
81  
82  	for (j = 0; j < folio_batch_count(fbatch); j++)
83  		if (xa_is_value(fbatch->folios[j]))
84  			break;
85  
86  	if (j == folio_batch_count(fbatch))
87  		return;
88  
89  	dax = dax_mapping(mapping);
90  	if (!dax) {
91  		spin_lock(&mapping->host->i_lock);
92  		xa_lock_irq(&mapping->i_pages);
93  	}
94  
95  	for (i = j; i < folio_batch_count(fbatch); i++) {
96  		struct folio *folio = fbatch->folios[i];
97  		pgoff_t index = indices[i];
98  
99  		if (!xa_is_value(folio)) {
100  			fbatch->folios[j++] = folio;
101  			continue;
102  		}
103  
104  		if (unlikely(dax)) {
105  			dax_delete_mapping_entry(mapping, index);
106  			continue;
107  		}
108  
109  		__clear_shadow_entry(mapping, index, folio);
110  	}
111  
112  	if (!dax) {
113  		xa_unlock_irq(&mapping->i_pages);
114  		if (mapping_shrinkable(mapping))
115  			inode_add_lru(mapping->host);
116  		spin_unlock(&mapping->host->i_lock);
117  	}
118  	fbatch->nr = j;
119  }
120  
121  /**
122   * folio_invalidate - Invalidate part or all of a folio.
123   * @folio: The folio which is affected.
124   * @offset: start of the range to invalidate
125   * @length: length of the range to invalidate
126   *
127   * folio_invalidate() is called when all or part of the folio has become
128   * invalidated by a truncate operation.
129   *
130   * folio_invalidate() does not have to release all buffers, but it must
131   * ensure that no dirty buffer is left outside @offset and that no I/O
132   * is underway against any of the blocks which are outside the truncation
133   * point.  Because the caller is about to free (and possibly reuse) those
134   * blocks on-disk.
135   */
136  void folio_invalidate(struct folio *folio, size_t offset, size_t length)
137  {
138  	const struct address_space_operations *aops = folio->mapping->a_ops;
139  
140  	if (aops->invalidate_folio)
141  		aops->invalidate_folio(folio, offset, length);
142  }
143  EXPORT_SYMBOL_GPL(folio_invalidate);
144  
145  /*
146   * If truncate cannot remove the fs-private metadata from the page, the page
147   * becomes orphaned.  It will be left on the LRU and may even be mapped into
148   * user pagetables if we're racing with filemap_fault().
149   *
150   * We need to bail out if page->mapping is no longer equal to the original
151   * mapping.  This happens a) when the VM reclaimed the page while we waited on
152   * its lock, b) when a concurrent invalidate_mapping_pages got there first and
153   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
154   */
155  static void truncate_cleanup_folio(struct folio *folio)
156  {
157  	if (folio_mapped(folio))
158  		unmap_mapping_folio(folio);
159  
160  	if (folio_needs_release(folio))
161  		folio_invalidate(folio, 0, folio_size(folio));
162  
163  	/*
164  	 * Some filesystems seem to re-dirty the page even after
165  	 * the VM has canceled the dirty bit (eg ext3 journaling).
166  	 * Hence dirty accounting check is placed after invalidation.
167  	 */
168  	folio_cancel_dirty(folio);
169  	folio_clear_mappedtodisk(folio);
170  }
171  
172  int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
173  {
174  	if (folio->mapping != mapping)
175  		return -EIO;
176  
177  	truncate_cleanup_folio(folio);
178  	filemap_remove_folio(folio);
179  	return 0;
180  }
181  
182  /*
183   * Handle partial folios.  The folio may be entirely within the
184   * range if a split has raced with us.  If not, we zero the part of the
185   * folio that's within the [start, end] range, and then split the folio if
186   * it's large.  split_page_range() will discard pages which now lie beyond
187   * i_size, and we rely on the caller to discard pages which lie within a
188   * newly created hole.
189   *
190   * Returns false if splitting failed so the caller can avoid
191   * discarding the entire folio which is stubbornly unsplit.
192   */
193  bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
194  {
195  	loff_t pos = folio_pos(folio);
196  	unsigned int offset, length;
197  
198  	if (pos < start)
199  		offset = start - pos;
200  	else
201  		offset = 0;
202  	length = folio_size(folio);
203  	if (pos + length <= (u64)end)
204  		length = length - offset;
205  	else
206  		length = end + 1 - pos - offset;
207  
208  	folio_wait_writeback(folio);
209  	if (length == folio_size(folio)) {
210  		truncate_inode_folio(folio->mapping, folio);
211  		return true;
212  	}
213  
214  	/*
215  	 * We may be zeroing pages we're about to discard, but it avoids
216  	 * doing a complex calculation here, and then doing the zeroing
217  	 * anyway if the page split fails.
218  	 */
219  	if (!mapping_inaccessible(folio->mapping))
220  		folio_zero_range(folio, offset, length);
221  
222  	if (folio_needs_release(folio))
223  		folio_invalidate(folio, offset, length);
224  	if (!folio_test_large(folio))
225  		return true;
226  	if (split_folio(folio) == 0)
227  		return true;
228  	if (folio_test_dirty(folio))
229  		return false;
230  	truncate_inode_folio(folio->mapping, folio);
231  	return true;
232  }
233  
234  /*
235   * Used to get rid of pages on hardware memory corruption.
236   */
237  int generic_error_remove_folio(struct address_space *mapping,
238  		struct folio *folio)
239  {
240  	if (!mapping)
241  		return -EINVAL;
242  	/*
243  	 * Only punch for normal data pages for now.
244  	 * Handling other types like directories would need more auditing.
245  	 */
246  	if (!S_ISREG(mapping->host->i_mode))
247  		return -EIO;
248  	return truncate_inode_folio(mapping, folio);
249  }
250  EXPORT_SYMBOL(generic_error_remove_folio);
251  
252  /**
253   * mapping_evict_folio() - Remove an unused folio from the page-cache.
254   * @mapping: The mapping this folio belongs to.
255   * @folio: The folio to remove.
256   *
257   * Safely remove one folio from the page cache.
258   * It only drops clean, unused folios.
259   *
260   * Context: Folio must be locked.
261   * Return: The number of pages successfully removed.
262   */
263  long mapping_evict_folio(struct address_space *mapping, struct folio *folio)
264  {
265  	/* The page may have been truncated before it was locked */
266  	if (!mapping)
267  		return 0;
268  	if (folio_test_dirty(folio) || folio_test_writeback(folio))
269  		return 0;
270  	/* The refcount will be elevated if any page in the folio is mapped */
271  	if (folio_ref_count(folio) >
272  			folio_nr_pages(folio) + folio_has_private(folio) + 1)
273  		return 0;
274  	if (!filemap_release_folio(folio, 0))
275  		return 0;
276  
277  	return remove_mapping(mapping, folio);
278  }
279  
280  /**
281   * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
282   * @mapping: mapping to truncate
283   * @lstart: offset from which to truncate
284   * @lend: offset to which to truncate (inclusive)
285   *
286   * Truncate the page cache, removing the pages that are between
287   * specified offsets (and zeroing out partial pages
288   * if lstart or lend + 1 is not page aligned).
289   *
290   * Truncate takes two passes - the first pass is nonblocking.  It will not
291   * block on page locks and it will not block on writeback.  The second pass
292   * will wait.  This is to prevent as much IO as possible in the affected region.
293   * The first pass will remove most pages, so the search cost of the second pass
294   * is low.
295   *
296   * We pass down the cache-hot hint to the page freeing code.  Even if the
297   * mapping is large, it is probably the case that the final pages are the most
298   * recently touched, and freeing happens in ascending file offset order.
299   *
300   * Note that since ->invalidate_folio() accepts range to invalidate
301   * truncate_inode_pages_range is able to handle cases where lend + 1 is not
302   * page aligned properly.
303   */
304  void truncate_inode_pages_range(struct address_space *mapping,
305  				loff_t lstart, loff_t lend)
306  {
307  	pgoff_t		start;		/* inclusive */
308  	pgoff_t		end;		/* exclusive */
309  	struct folio_batch fbatch;
310  	pgoff_t		indices[PAGEVEC_SIZE];
311  	pgoff_t		index;
312  	int		i;
313  	struct folio	*folio;
314  	bool		same_folio;
315  
316  	if (mapping_empty(mapping))
317  		return;
318  
319  	/*
320  	 * 'start' and 'end' always covers the range of pages to be fully
321  	 * truncated. Partial pages are covered with 'partial_start' at the
322  	 * start of the range and 'partial_end' at the end of the range.
323  	 * Note that 'end' is exclusive while 'lend' is inclusive.
324  	 */
325  	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
326  	if (lend == -1)
327  		/*
328  		 * lend == -1 indicates end-of-file so we have to set 'end'
329  		 * to the highest possible pgoff_t and since the type is
330  		 * unsigned we're using -1.
331  		 */
332  		end = -1;
333  	else
334  		end = (lend + 1) >> PAGE_SHIFT;
335  
336  	folio_batch_init(&fbatch);
337  	index = start;
338  	while (index < end && find_lock_entries(mapping, &index, end - 1,
339  			&fbatch, indices)) {
340  		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
341  		for (i = 0; i < folio_batch_count(&fbatch); i++)
342  			truncate_cleanup_folio(fbatch.folios[i]);
343  		delete_from_page_cache_batch(mapping, &fbatch);
344  		for (i = 0; i < folio_batch_count(&fbatch); i++)
345  			folio_unlock(fbatch.folios[i]);
346  		folio_batch_release(&fbatch);
347  		cond_resched();
348  	}
349  
350  	same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
351  	folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
352  	if (!IS_ERR(folio)) {
353  		same_folio = lend < folio_pos(folio) + folio_size(folio);
354  		if (!truncate_inode_partial_folio(folio, lstart, lend)) {
355  			start = folio_next_index(folio);
356  			if (same_folio)
357  				end = folio->index;
358  		}
359  		folio_unlock(folio);
360  		folio_put(folio);
361  		folio = NULL;
362  	}
363  
364  	if (!same_folio) {
365  		folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
366  						FGP_LOCK, 0);
367  		if (!IS_ERR(folio)) {
368  			if (!truncate_inode_partial_folio(folio, lstart, lend))
369  				end = folio->index;
370  			folio_unlock(folio);
371  			folio_put(folio);
372  		}
373  	}
374  
375  	index = start;
376  	while (index < end) {
377  		cond_resched();
378  		if (!find_get_entries(mapping, &index, end - 1, &fbatch,
379  				indices)) {
380  			/* If all gone from start onwards, we're done */
381  			if (index == start)
382  				break;
383  			/* Otherwise restart to make sure all gone */
384  			index = start;
385  			continue;
386  		}
387  
388  		for (i = 0; i < folio_batch_count(&fbatch); i++) {
389  			struct folio *folio = fbatch.folios[i];
390  
391  			/* We rely upon deletion not changing page->index */
392  
393  			if (xa_is_value(folio))
394  				continue;
395  
396  			folio_lock(folio);
397  			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
398  			folio_wait_writeback(folio);
399  			truncate_inode_folio(mapping, folio);
400  			folio_unlock(folio);
401  		}
402  		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
403  		folio_batch_release(&fbatch);
404  	}
405  }
406  EXPORT_SYMBOL(truncate_inode_pages_range);
407  
408  /**
409   * truncate_inode_pages - truncate *all* the pages from an offset
410   * @mapping: mapping to truncate
411   * @lstart: offset from which to truncate
412   *
413   * Called under (and serialised by) inode->i_rwsem and
414   * mapping->invalidate_lock.
415   *
416   * Note: When this function returns, there can be a page in the process of
417   * deletion (inside __filemap_remove_folio()) in the specified range.  Thus
418   * mapping->nrpages can be non-zero when this function returns even after
419   * truncation of the whole mapping.
420   */
421  void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
422  {
423  	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
424  }
425  EXPORT_SYMBOL(truncate_inode_pages);
426  
427  /**
428   * truncate_inode_pages_final - truncate *all* pages before inode dies
429   * @mapping: mapping to truncate
430   *
431   * Called under (and serialized by) inode->i_rwsem.
432   *
433   * Filesystems have to use this in the .evict_inode path to inform the
434   * VM that this is the final truncate and the inode is going away.
435   */
436  void truncate_inode_pages_final(struct address_space *mapping)
437  {
438  	/*
439  	 * Page reclaim can not participate in regular inode lifetime
440  	 * management (can't call iput()) and thus can race with the
441  	 * inode teardown.  Tell it when the address space is exiting,
442  	 * so that it does not install eviction information after the
443  	 * final truncate has begun.
444  	 */
445  	mapping_set_exiting(mapping);
446  
447  	if (!mapping_empty(mapping)) {
448  		/*
449  		 * As truncation uses a lockless tree lookup, cycle
450  		 * the tree lock to make sure any ongoing tree
451  		 * modification that does not see AS_EXITING is
452  		 * completed before starting the final truncate.
453  		 */
454  		xa_lock_irq(&mapping->i_pages);
455  		xa_unlock_irq(&mapping->i_pages);
456  	}
457  
458  	truncate_inode_pages(mapping, 0);
459  }
460  EXPORT_SYMBOL(truncate_inode_pages_final);
461  
462  /**
463   * mapping_try_invalidate - Invalidate all the evictable folios of one inode
464   * @mapping: the address_space which holds the folios to invalidate
465   * @start: the offset 'from' which to invalidate
466   * @end: the offset 'to' which to invalidate (inclusive)
467   * @nr_failed: How many folio invalidations failed
468   *
469   * This function is similar to invalidate_mapping_pages(), except that it
470   * returns the number of folios which could not be evicted in @nr_failed.
471   */
472  unsigned long mapping_try_invalidate(struct address_space *mapping,
473  		pgoff_t start, pgoff_t end, unsigned long *nr_failed)
474  {
475  	pgoff_t indices[PAGEVEC_SIZE];
476  	struct folio_batch fbatch;
477  	pgoff_t index = start;
478  	unsigned long ret;
479  	unsigned long count = 0;
480  	int i;
481  	bool xa_has_values = false;
482  
483  	folio_batch_init(&fbatch);
484  	while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
485  		for (i = 0; i < folio_batch_count(&fbatch); i++) {
486  			struct folio *folio = fbatch.folios[i];
487  
488  			/* We rely upon deletion not changing folio->index */
489  
490  			if (xa_is_value(folio)) {
491  				xa_has_values = true;
492  				count++;
493  				continue;
494  			}
495  
496  			ret = mapping_evict_folio(mapping, folio);
497  			folio_unlock(folio);
498  			/*
499  			 * Invalidation is a hint that the folio is no longer
500  			 * of interest and try to speed up its reclaim.
501  			 */
502  			if (!ret) {
503  				deactivate_file_folio(folio);
504  				/* Likely in the lru cache of a remote CPU */
505  				if (nr_failed)
506  					(*nr_failed)++;
507  			}
508  			count += ret;
509  		}
510  
511  		if (xa_has_values)
512  			clear_shadow_entries(mapping, &fbatch, indices);
513  
514  		folio_batch_remove_exceptionals(&fbatch);
515  		folio_batch_release(&fbatch);
516  		cond_resched();
517  	}
518  	return count;
519  }
520  
521  /**
522   * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
523   * @mapping: the address_space which holds the cache to invalidate
524   * @start: the offset 'from' which to invalidate
525   * @end: the offset 'to' which to invalidate (inclusive)
526   *
527   * This function removes pages that are clean, unmapped and unlocked,
528   * as well as shadow entries. It will not block on IO activity.
529   *
530   * If you want to remove all the pages of one inode, regardless of
531   * their use and writeback state, use truncate_inode_pages().
532   *
533   * Return: The number of indices that had their contents invalidated
534   */
535  unsigned long invalidate_mapping_pages(struct address_space *mapping,
536  		pgoff_t start, pgoff_t end)
537  {
538  	return mapping_try_invalidate(mapping, start, end, NULL);
539  }
540  EXPORT_SYMBOL(invalidate_mapping_pages);
541  
542  /*
543   * This is like mapping_evict_folio(), except it ignores the folio's
544   * refcount.  We do this because invalidate_inode_pages2() needs stronger
545   * invalidation guarantees, and cannot afford to leave folios behind because
546   * shrink_folio_list() has a temp ref on them, or because they're transiently
547   * sitting in the folio_add_lru() caches.
548   */
549  static int invalidate_complete_folio2(struct address_space *mapping,
550  					struct folio *folio)
551  {
552  	if (folio->mapping != mapping)
553  		return 0;
554  
555  	if (!filemap_release_folio(folio, GFP_KERNEL))
556  		return 0;
557  
558  	spin_lock(&mapping->host->i_lock);
559  	xa_lock_irq(&mapping->i_pages);
560  	if (folio_test_dirty(folio))
561  		goto failed;
562  
563  	BUG_ON(folio_has_private(folio));
564  	__filemap_remove_folio(folio, NULL);
565  	xa_unlock_irq(&mapping->i_pages);
566  	if (mapping_shrinkable(mapping))
567  		inode_add_lru(mapping->host);
568  	spin_unlock(&mapping->host->i_lock);
569  
570  	filemap_free_folio(mapping, folio);
571  	return 1;
572  failed:
573  	xa_unlock_irq(&mapping->i_pages);
574  	spin_unlock(&mapping->host->i_lock);
575  	return 0;
576  }
577  
578  static int folio_launder(struct address_space *mapping, struct folio *folio)
579  {
580  	if (!folio_test_dirty(folio))
581  		return 0;
582  	if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL)
583  		return 0;
584  	return mapping->a_ops->launder_folio(folio);
585  }
586  
587  /**
588   * invalidate_inode_pages2_range - remove range of pages from an address_space
589   * @mapping: the address_space
590   * @start: the page offset 'from' which to invalidate
591   * @end: the page offset 'to' which to invalidate (inclusive)
592   *
593   * Any pages which are found to be mapped into pagetables are unmapped prior to
594   * invalidation.
595   *
596   * Return: -EBUSY if any pages could not be invalidated.
597   */
598  int invalidate_inode_pages2_range(struct address_space *mapping,
599  				  pgoff_t start, pgoff_t end)
600  {
601  	pgoff_t indices[PAGEVEC_SIZE];
602  	struct folio_batch fbatch;
603  	pgoff_t index;
604  	int i;
605  	int ret = 0;
606  	int ret2 = 0;
607  	int did_range_unmap = 0;
608  	bool xa_has_values = false;
609  
610  	if (mapping_empty(mapping))
611  		return 0;
612  
613  	folio_batch_init(&fbatch);
614  	index = start;
615  	while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
616  		for (i = 0; i < folio_batch_count(&fbatch); i++) {
617  			struct folio *folio = fbatch.folios[i];
618  
619  			/* We rely upon deletion not changing folio->index */
620  
621  			if (xa_is_value(folio)) {
622  				xa_has_values = true;
623  				if (dax_mapping(mapping) &&
624  				    !dax_invalidate_mapping_entry_sync(mapping, indices[i]))
625  					ret = -EBUSY;
626  				continue;
627  			}
628  
629  			if (!did_range_unmap && folio_mapped(folio)) {
630  				/*
631  				 * If folio is mapped, before taking its lock,
632  				 * zap the rest of the file in one hit.
633  				 */
634  				unmap_mapping_pages(mapping, indices[i],
635  						(1 + end - indices[i]), false);
636  				did_range_unmap = 1;
637  			}
638  
639  			folio_lock(folio);
640  			if (unlikely(folio->mapping != mapping)) {
641  				folio_unlock(folio);
642  				continue;
643  			}
644  			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
645  			folio_wait_writeback(folio);
646  
647  			if (folio_mapped(folio))
648  				unmap_mapping_folio(folio);
649  			BUG_ON(folio_mapped(folio));
650  
651  			ret2 = folio_launder(mapping, folio);
652  			if (ret2 == 0) {
653  				if (!invalidate_complete_folio2(mapping, folio))
654  					ret2 = -EBUSY;
655  			}
656  			if (ret2 < 0)
657  				ret = ret2;
658  			folio_unlock(folio);
659  		}
660  
661  		if (xa_has_values)
662  			clear_shadow_entries(mapping, &fbatch, indices);
663  
664  		folio_batch_remove_exceptionals(&fbatch);
665  		folio_batch_release(&fbatch);
666  		cond_resched();
667  	}
668  	/*
669  	 * For DAX we invalidate page tables after invalidating page cache.  We
670  	 * could invalidate page tables while invalidating each entry however
671  	 * that would be expensive. And doing range unmapping before doesn't
672  	 * work as we have no cheap way to find whether page cache entry didn't
673  	 * get remapped later.
674  	 */
675  	if (dax_mapping(mapping)) {
676  		unmap_mapping_pages(mapping, start, end - start + 1, false);
677  	}
678  	return ret;
679  }
680  EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
681  
682  /**
683   * invalidate_inode_pages2 - remove all pages from an address_space
684   * @mapping: the address_space
685   *
686   * Any pages which are found to be mapped into pagetables are unmapped prior to
687   * invalidation.
688   *
689   * Return: -EBUSY if any pages could not be invalidated.
690   */
691  int invalidate_inode_pages2(struct address_space *mapping)
692  {
693  	return invalidate_inode_pages2_range(mapping, 0, -1);
694  }
695  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
696  
697  /**
698   * truncate_pagecache - unmap and remove pagecache that has been truncated
699   * @inode: inode
700   * @newsize: new file size
701   *
702   * inode's new i_size must already be written before truncate_pagecache
703   * is called.
704   *
705   * This function should typically be called before the filesystem
706   * releases resources associated with the freed range (eg. deallocates
707   * blocks). This way, pagecache will always stay logically coherent
708   * with on-disk format, and the filesystem would not have to deal with
709   * situations such as writepage being called for a page that has already
710   * had its underlying blocks deallocated.
711   */
712  void truncate_pagecache(struct inode *inode, loff_t newsize)
713  {
714  	struct address_space *mapping = inode->i_mapping;
715  	loff_t holebegin = round_up(newsize, PAGE_SIZE);
716  
717  	/*
718  	 * unmap_mapping_range is called twice, first simply for
719  	 * efficiency so that truncate_inode_pages does fewer
720  	 * single-page unmaps.  However after this first call, and
721  	 * before truncate_inode_pages finishes, it is possible for
722  	 * private pages to be COWed, which remain after
723  	 * truncate_inode_pages finishes, hence the second
724  	 * unmap_mapping_range call must be made for correctness.
725  	 */
726  	unmap_mapping_range(mapping, holebegin, 0, 1);
727  	truncate_inode_pages(mapping, newsize);
728  	unmap_mapping_range(mapping, holebegin, 0, 1);
729  }
730  EXPORT_SYMBOL(truncate_pagecache);
731  
732  /**
733   * truncate_setsize - update inode and pagecache for a new file size
734   * @inode: inode
735   * @newsize: new file size
736   *
737   * truncate_setsize updates i_size and performs pagecache truncation (if
738   * necessary) to @newsize. It will be typically be called from the filesystem's
739   * setattr function when ATTR_SIZE is passed in.
740   *
741   * Must be called with a lock serializing truncates and writes (generally
742   * i_rwsem but e.g. xfs uses a different lock) and before all filesystem
743   * specific block truncation has been performed.
744   */
745  void truncate_setsize(struct inode *inode, loff_t newsize)
746  {
747  	loff_t oldsize = inode->i_size;
748  
749  	i_size_write(inode, newsize);
750  	if (newsize > oldsize)
751  		pagecache_isize_extended(inode, oldsize, newsize);
752  	truncate_pagecache(inode, newsize);
753  }
754  EXPORT_SYMBOL(truncate_setsize);
755  
756  /**
757   * pagecache_isize_extended - update pagecache after extension of i_size
758   * @inode:	inode for which i_size was extended
759   * @from:	original inode size
760   * @to:		new inode size
761   *
762   * Handle extension of inode size either caused by extending truncate or
763   * by write starting after current i_size.  We mark the page straddling
764   * current i_size RO so that page_mkwrite() is called on the first
765   * write access to the page.  The filesystem will update its per-block
766   * information before user writes to the page via mmap after the i_size
767   * has been changed.
768   *
769   * The function must be called after i_size is updated so that page fault
770   * coming after we unlock the folio will already see the new i_size.
771   * The function must be called while we still hold i_rwsem - this not only
772   * makes sure i_size is stable but also that userspace cannot observe new
773   * i_size value before we are prepared to store mmap writes at new inode size.
774   */
775  void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
776  {
777  	int bsize = i_blocksize(inode);
778  	loff_t rounded_from;
779  	struct folio *folio;
780  
781  	WARN_ON(to > inode->i_size);
782  
783  	if (from >= to || bsize >= PAGE_SIZE)
784  		return;
785  	/* Page straddling @from will not have any hole block created? */
786  	rounded_from = round_up(from, bsize);
787  	if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
788  		return;
789  
790  	folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE);
791  	/* Folio not cached? Nothing to do */
792  	if (IS_ERR(folio))
793  		return;
794  	/*
795  	 * See folio_clear_dirty_for_io() for details why folio_mark_dirty()
796  	 * is needed.
797  	 */
798  	if (folio_mkclean(folio))
799  		folio_mark_dirty(folio);
800  	folio_unlock(folio);
801  	folio_put(folio);
802  }
803  EXPORT_SYMBOL(pagecache_isize_extended);
804  
805  /**
806   * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
807   * @inode: inode
808   * @lstart: offset of beginning of hole
809   * @lend: offset of last byte of hole
810   *
811   * This function should typically be called before the filesystem
812   * releases resources associated with the freed range (eg. deallocates
813   * blocks). This way, pagecache will always stay logically coherent
814   * with on-disk format, and the filesystem would not have to deal with
815   * situations such as writepage being called for a page that has already
816   * had its underlying blocks deallocated.
817   */
818  void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
819  {
820  	struct address_space *mapping = inode->i_mapping;
821  	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
822  	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
823  	/*
824  	 * This rounding is currently just for example: unmap_mapping_range
825  	 * expands its hole outwards, whereas we want it to contract the hole
826  	 * inwards.  However, existing callers of truncate_pagecache_range are
827  	 * doing their own page rounding first.  Note that unmap_mapping_range
828  	 * allows holelen 0 for all, and we allow lend -1 for end of file.
829  	 */
830  
831  	/*
832  	 * Unlike in truncate_pagecache, unmap_mapping_range is called only
833  	 * once (before truncating pagecache), and without "even_cows" flag:
834  	 * hole-punching should not remove private COWed pages from the hole.
835  	 */
836  	if ((u64)unmap_end > (u64)unmap_start)
837  		unmap_mapping_range(mapping, unmap_start,
838  				    1 + unmap_end - unmap_start, 0);
839  	truncate_inode_pages_range(mapping, lstart, lend);
840  }
841  EXPORT_SYMBOL(truncate_pagecache_range);
842