xref: /linux/mm/truncate.c (revision 606d099cdd1080bbb50ea50dc52d98252f8f10a1)
1 /*
2  * mm/truncate.c - code for taking down pages from address_spaces
3  *
4  * Copyright (C) 2002, Linus Torvalds
5  *
6  * 10Sep2002	akpm@zip.com.au
7  *		Initial version.
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/swap.h>
13 #include <linux/module.h>
14 #include <linux/pagemap.h>
15 #include <linux/pagevec.h>
16 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
17 				   do_invalidatepage */
18 
19 
20 /**
21  * do_invalidatepage - invalidate part of all of a page
22  * @page: the page which is affected
23  * @offset: the index of the truncation point
24  *
25  * do_invalidatepage() is called when all or part of the page has become
26  * invalidated by a truncate operation.
27  *
28  * do_invalidatepage() does not have to release all buffers, but it must
29  * ensure that no dirty buffer is left outside @offset and that no I/O
30  * is underway against any of the blocks which are outside the truncation
31  * point.  Because the caller is about to free (and possibly reuse) those
32  * blocks on-disk.
33  */
34 void do_invalidatepage(struct page *page, unsigned long offset)
35 {
36 	void (*invalidatepage)(struct page *, unsigned long);
37 	invalidatepage = page->mapping->a_ops->invalidatepage;
38 #ifdef CONFIG_BLOCK
39 	if (!invalidatepage)
40 		invalidatepage = block_invalidatepage;
41 #endif
42 	if (invalidatepage)
43 		(*invalidatepage)(page, offset);
44 }
45 
46 static inline void truncate_partial_page(struct page *page, unsigned partial)
47 {
48 	memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
49 	if (PagePrivate(page))
50 		do_invalidatepage(page, partial);
51 }
52 
53 /*
54  * If truncate cannot remove the fs-private metadata from the page, the page
55  * becomes anonymous.  It will be left on the LRU and may even be mapped into
56  * user pagetables if we're racing with filemap_nopage().
57  *
58  * We need to bale out if page->mapping is no longer equal to the original
59  * mapping.  This happens a) when the VM reclaimed the page while we waited on
60  * its lock, b) when a concurrent invalidate_inode_pages got there first and
61  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
62  */
63 static void
64 truncate_complete_page(struct address_space *mapping, struct page *page)
65 {
66 	if (page->mapping != mapping)
67 		return;
68 
69 	if (PagePrivate(page))
70 		do_invalidatepage(page, 0);
71 
72 	clear_page_dirty(page);
73 	ClearPageUptodate(page);
74 	ClearPageMappedToDisk(page);
75 	remove_from_page_cache(page);
76 	page_cache_release(page);	/* pagecache ref */
77 }
78 
79 /*
80  * This is for invalidate_inode_pages().  That function can be called at
81  * any time, and is not supposed to throw away dirty pages.  But pages can
82  * be marked dirty at any time too, so use remove_mapping which safely
83  * discards clean, unused pages.
84  *
85  * Returns non-zero if the page was successfully invalidated.
86  */
87 static int
88 invalidate_complete_page(struct address_space *mapping, struct page *page)
89 {
90 	int ret;
91 
92 	if (page->mapping != mapping)
93 		return 0;
94 
95 	if (PagePrivate(page) && !try_to_release_page(page, 0))
96 		return 0;
97 
98 	ret = remove_mapping(mapping, page);
99 
100 	return ret;
101 }
102 
103 /**
104  * truncate_inode_pages - truncate range of pages specified by start and
105  * end byte offsets
106  * @mapping: mapping to truncate
107  * @lstart: offset from which to truncate
108  * @lend: offset to which to truncate
109  *
110  * Truncate the page cache, removing the pages that are between
111  * specified offsets (and zeroing out partial page
112  * (if lstart is not page aligned)).
113  *
114  * Truncate takes two passes - the first pass is nonblocking.  It will not
115  * block on page locks and it will not block on writeback.  The second pass
116  * will wait.  This is to prevent as much IO as possible in the affected region.
117  * The first pass will remove most pages, so the search cost of the second pass
118  * is low.
119  *
120  * When looking at page->index outside the page lock we need to be careful to
121  * copy it into a local to avoid races (it could change at any time).
122  *
123  * We pass down the cache-hot hint to the page freeing code.  Even if the
124  * mapping is large, it is probably the case that the final pages are the most
125  * recently touched, and freeing happens in ascending file offset order.
126  */
127 void truncate_inode_pages_range(struct address_space *mapping,
128 				loff_t lstart, loff_t lend)
129 {
130 	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
131 	pgoff_t end;
132 	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
133 	struct pagevec pvec;
134 	pgoff_t next;
135 	int i;
136 
137 	if (mapping->nrpages == 0)
138 		return;
139 
140 	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
141 	end = (lend >> PAGE_CACHE_SHIFT);
142 
143 	pagevec_init(&pvec, 0);
144 	next = start;
145 	while (next <= end &&
146 	       pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
147 		for (i = 0; i < pagevec_count(&pvec); i++) {
148 			struct page *page = pvec.pages[i];
149 			pgoff_t page_index = page->index;
150 
151 			if (page_index > end) {
152 				next = page_index;
153 				break;
154 			}
155 
156 			if (page_index > next)
157 				next = page_index;
158 			next++;
159 			if (TestSetPageLocked(page))
160 				continue;
161 			if (PageWriteback(page)) {
162 				unlock_page(page);
163 				continue;
164 			}
165 			truncate_complete_page(mapping, page);
166 			unlock_page(page);
167 		}
168 		pagevec_release(&pvec);
169 		cond_resched();
170 	}
171 
172 	if (partial) {
173 		struct page *page = find_lock_page(mapping, start - 1);
174 		if (page) {
175 			wait_on_page_writeback(page);
176 			truncate_partial_page(page, partial);
177 			unlock_page(page);
178 			page_cache_release(page);
179 		}
180 	}
181 
182 	next = start;
183 	for ( ; ; ) {
184 		cond_resched();
185 		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
186 			if (next == start)
187 				break;
188 			next = start;
189 			continue;
190 		}
191 		if (pvec.pages[0]->index > end) {
192 			pagevec_release(&pvec);
193 			break;
194 		}
195 		for (i = 0; i < pagevec_count(&pvec); i++) {
196 			struct page *page = pvec.pages[i];
197 
198 			if (page->index > end)
199 				break;
200 			lock_page(page);
201 			wait_on_page_writeback(page);
202 			if (page->index > next)
203 				next = page->index;
204 			next++;
205 			truncate_complete_page(mapping, page);
206 			unlock_page(page);
207 		}
208 		pagevec_release(&pvec);
209 	}
210 }
211 EXPORT_SYMBOL(truncate_inode_pages_range);
212 
213 /**
214  * truncate_inode_pages - truncate *all* the pages from an offset
215  * @mapping: mapping to truncate
216  * @lstart: offset from which to truncate
217  *
218  * Called under (and serialised by) inode->i_mutex.
219  */
220 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
221 {
222 	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
223 }
224 EXPORT_SYMBOL(truncate_inode_pages);
225 
226 /**
227  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
228  * @mapping: the address_space which holds the pages to invalidate
229  * @start: the offset 'from' which to invalidate
230  * @end: the offset 'to' which to invalidate (inclusive)
231  *
232  * This function only removes the unlocked pages, if you want to
233  * remove all the pages of one inode, you must call truncate_inode_pages.
234  *
235  * invalidate_mapping_pages() will not block on IO activity. It will not
236  * invalidate pages which are dirty, locked, under writeback or mapped into
237  * pagetables.
238  */
239 unsigned long invalidate_mapping_pages(struct address_space *mapping,
240 				pgoff_t start, pgoff_t end)
241 {
242 	struct pagevec pvec;
243 	pgoff_t next = start;
244 	unsigned long ret = 0;
245 	int i;
246 
247 	pagevec_init(&pvec, 0);
248 	while (next <= end &&
249 			pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
250 		for (i = 0; i < pagevec_count(&pvec); i++) {
251 			struct page *page = pvec.pages[i];
252 			pgoff_t index;
253 			int lock_failed;
254 
255 			lock_failed = TestSetPageLocked(page);
256 
257 			/*
258 			 * We really shouldn't be looking at the ->index of an
259 			 * unlocked page.  But we're not allowed to lock these
260 			 * pages.  So we rely upon nobody altering the ->index
261 			 * of this (pinned-by-us) page.
262 			 */
263 			index = page->index;
264 			if (index > next)
265 				next = index;
266 			next++;
267 			if (lock_failed)
268 				continue;
269 
270 			if (PageDirty(page) || PageWriteback(page))
271 				goto unlock;
272 			if (page_mapped(page))
273 				goto unlock;
274 			ret += invalidate_complete_page(mapping, page);
275 unlock:
276 			unlock_page(page);
277 			if (next > end)
278 				break;
279 		}
280 		pagevec_release(&pvec);
281 	}
282 	return ret;
283 }
284 
285 unsigned long invalidate_inode_pages(struct address_space *mapping)
286 {
287 	return invalidate_mapping_pages(mapping, 0, ~0UL);
288 }
289 EXPORT_SYMBOL(invalidate_inode_pages);
290 
291 /*
292  * This is like invalidate_complete_page(), except it ignores the page's
293  * refcount.  We do this because invalidate_inode_pages2() needs stronger
294  * invalidation guarantees, and cannot afford to leave pages behind because
295  * shrink_list() has a temp ref on them, or because they're transiently sitting
296  * in the lru_cache_add() pagevecs.
297  */
298 static int
299 invalidate_complete_page2(struct address_space *mapping, struct page *page)
300 {
301 	if (page->mapping != mapping)
302 		return 0;
303 
304 	if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
305 		return 0;
306 
307 	write_lock_irq(&mapping->tree_lock);
308 	if (PageDirty(page))
309 		goto failed;
310 
311 	BUG_ON(PagePrivate(page));
312 	__remove_from_page_cache(page);
313 	write_unlock_irq(&mapping->tree_lock);
314 	ClearPageUptodate(page);
315 	page_cache_release(page);	/* pagecache ref */
316 	return 1;
317 failed:
318 	write_unlock_irq(&mapping->tree_lock);
319 	return 0;
320 }
321 
322 /**
323  * invalidate_inode_pages2_range - remove range of pages from an address_space
324  * @mapping: the address_space
325  * @start: the page offset 'from' which to invalidate
326  * @end: the page offset 'to' which to invalidate (inclusive)
327  *
328  * Any pages which are found to be mapped into pagetables are unmapped prior to
329  * invalidation.
330  *
331  * Returns -EIO if any pages could not be invalidated.
332  */
333 int invalidate_inode_pages2_range(struct address_space *mapping,
334 				  pgoff_t start, pgoff_t end)
335 {
336 	struct pagevec pvec;
337 	pgoff_t next;
338 	int i;
339 	int ret = 0;
340 	int did_range_unmap = 0;
341 	int wrapped = 0;
342 
343 	pagevec_init(&pvec, 0);
344 	next = start;
345 	while (next <= end && !ret && !wrapped &&
346 		pagevec_lookup(&pvec, mapping, next,
347 			min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
348 		for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
349 			struct page *page = pvec.pages[i];
350 			pgoff_t page_index;
351 			int was_dirty;
352 
353 			lock_page(page);
354 			if (page->mapping != mapping) {
355 				unlock_page(page);
356 				continue;
357 			}
358 			page_index = page->index;
359 			next = page_index + 1;
360 			if (next == 0)
361 				wrapped = 1;
362 			if (page_index > end) {
363 				unlock_page(page);
364 				break;
365 			}
366 			wait_on_page_writeback(page);
367 			while (page_mapped(page)) {
368 				if (!did_range_unmap) {
369 					/*
370 					 * Zap the rest of the file in one hit.
371 					 */
372 					unmap_mapping_range(mapping,
373 					   (loff_t)page_index<<PAGE_CACHE_SHIFT,
374 					   (loff_t)(end - page_index + 1)
375 							<< PAGE_CACHE_SHIFT,
376 					    0);
377 					did_range_unmap = 1;
378 				} else {
379 					/*
380 					 * Just zap this page
381 					 */
382 					unmap_mapping_range(mapping,
383 					  (loff_t)page_index<<PAGE_CACHE_SHIFT,
384 					  PAGE_CACHE_SIZE, 0);
385 				}
386 			}
387 			was_dirty = test_clear_page_dirty(page);
388 			if (!invalidate_complete_page2(mapping, page)) {
389 				if (was_dirty)
390 					set_page_dirty(page);
391 				ret = -EIO;
392 			}
393 			unlock_page(page);
394 		}
395 		pagevec_release(&pvec);
396 		cond_resched();
397 	}
398 	WARN_ON_ONCE(ret);
399 	return ret;
400 }
401 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
402 
403 /**
404  * invalidate_inode_pages2 - remove all pages from an address_space
405  * @mapping: the address_space
406  *
407  * Any pages which are found to be mapped into pagetables are unmapped prior to
408  * invalidation.
409  *
410  * Returns -EIO if any pages could not be invalidated.
411  */
412 int invalidate_inode_pages2(struct address_space *mapping)
413 {
414 	return invalidate_inode_pages2_range(mapping, 0, -1);
415 }
416 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
417