11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 6e1f8e874SFrancois Cami * 10Sep2002 Andrew Morton 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 114af3c9ccSAlexey Dobriyan #include <linux/backing-dev.h> 125a0e3ad6STejun Heo #include <linux/gfp.h> 131da177e4SLinus Torvalds #include <linux/mm.h> 140fd0e6b0SNick Piggin #include <linux/swap.h> 15b95f1b31SPaul Gortmaker #include <linux/export.h> 161da177e4SLinus Torvalds #include <linux/pagemap.h> 1701f2705dSNate Diller #include <linux/highmem.h> 181da177e4SLinus Torvalds #include <linux/pagevec.h> 19e08748ceSAndrew Morton #include <linux/task_io_accounting_ops.h> 201da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 21aaa4059bSJan Kara do_invalidatepage */ 22c515e1fdSDan Magenheimer #include <linux/cleancache.h> 23ba470de4SRik van Riel #include "internal.h" 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds 26cf9a2ae8SDavid Howells /** 2728bc44d7SFengguang Wu * do_invalidatepage - invalidate part or all of a page 28cf9a2ae8SDavid Howells * @page: the page which is affected 29cf9a2ae8SDavid Howells * @offset: the index of the truncation point 30cf9a2ae8SDavid Howells * 31cf9a2ae8SDavid Howells * do_invalidatepage() is called when all or part of the page has become 32cf9a2ae8SDavid Howells * invalidated by a truncate operation. 33cf9a2ae8SDavid Howells * 34cf9a2ae8SDavid Howells * do_invalidatepage() does not have to release all buffers, but it must 35cf9a2ae8SDavid Howells * ensure that no dirty buffer is left outside @offset and that no I/O 36cf9a2ae8SDavid Howells * is underway against any of the blocks which are outside the truncation 37cf9a2ae8SDavid Howells * point. Because the caller is about to free (and possibly reuse) those 38cf9a2ae8SDavid Howells * blocks on-disk. 39cf9a2ae8SDavid Howells */ 40cf9a2ae8SDavid Howells void do_invalidatepage(struct page *page, unsigned long offset) 41cf9a2ae8SDavid Howells { 42cf9a2ae8SDavid Howells void (*invalidatepage)(struct page *, unsigned long); 43cf9a2ae8SDavid Howells invalidatepage = page->mapping->a_ops->invalidatepage; 449361401eSDavid Howells #ifdef CONFIG_BLOCK 45cf9a2ae8SDavid Howells if (!invalidatepage) 46cf9a2ae8SDavid Howells invalidatepage = block_invalidatepage; 479361401eSDavid Howells #endif 48cf9a2ae8SDavid Howells if (invalidatepage) 49cf9a2ae8SDavid Howells (*invalidatepage)(page, offset); 50cf9a2ae8SDavid Howells } 51cf9a2ae8SDavid Howells 521da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 531da177e4SLinus Torvalds { 54eebd2aa3SChristoph Lameter zero_user_segment(page, partial, PAGE_CACHE_SIZE); 55c515e1fdSDan Magenheimer cleancache_flush_page(page->mapping, page); 56266cf658SDavid Howells if (page_has_private(page)) 571da177e4SLinus Torvalds do_invalidatepage(page, partial); 581da177e4SLinus Torvalds } 591da177e4SLinus Torvalds 60ecdfc978SLinus Torvalds /* 61ecdfc978SLinus Torvalds * This cancels just the dirty bit on the kernel page itself, it 62ecdfc978SLinus Torvalds * does NOT actually remove dirty bits on any mmap's that may be 63ecdfc978SLinus Torvalds * around. It also leaves the page tagged dirty, so any sync 64ecdfc978SLinus Torvalds * activity will still find it on the dirty lists, and in particular, 65ecdfc978SLinus Torvalds * clear_page_dirty_for_io() will still look at the dirty bits in 66ecdfc978SLinus Torvalds * the VM. 67ecdfc978SLinus Torvalds * 68ecdfc978SLinus Torvalds * Doing this should *normally* only ever be done when a page 69ecdfc978SLinus Torvalds * is truncated, and is not actually mapped anywhere at all. However, 70ecdfc978SLinus Torvalds * fs/buffer.c does this when it notices that somebody has cleaned 71ecdfc978SLinus Torvalds * out all the buffers on a page without actually doing it through 72ecdfc978SLinus Torvalds * the VM. Can you say "ext3 is horribly ugly"? Tought you could. 73ecdfc978SLinus Torvalds */ 74fba2591bSLinus Torvalds void cancel_dirty_page(struct page *page, unsigned int account_size) 75fba2591bSLinus Torvalds { 768368e328SLinus Torvalds if (TestClearPageDirty(page)) { 778368e328SLinus Torvalds struct address_space *mapping = page->mapping; 788368e328SLinus Torvalds if (mapping && mapping_cap_account_dirty(mapping)) { 793e67c098SAndrew Morton dec_zone_page_state(page, NR_FILE_DIRTY); 80c9e51e41SPeter Zijlstra dec_bdi_stat(mapping->backing_dev_info, 81c9e51e41SPeter Zijlstra BDI_RECLAIMABLE); 828368e328SLinus Torvalds if (account_size) 83fba2591bSLinus Torvalds task_io_account_cancelled_write(account_size); 84fba2591bSLinus Torvalds } 853e67c098SAndrew Morton } 868368e328SLinus Torvalds } 878368e328SLinus Torvalds EXPORT_SYMBOL(cancel_dirty_page); 88fba2591bSLinus Torvalds 891da177e4SLinus Torvalds /* 901da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 9162e1c553SShaohua Li * becomes orphaned. It will be left on the LRU and may even be mapped into 9254cb8821SNick Piggin * user pagetables if we're racing with filemap_fault(). 931da177e4SLinus Torvalds * 941da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 951da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 96fc0ecff6SAndrew Morton * its lock, b) when a concurrent invalidate_mapping_pages got there first and 971da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 981da177e4SLinus Torvalds */ 99750b4987SNick Piggin static int 1001da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 1011da177e4SLinus Torvalds { 1021da177e4SLinus Torvalds if (page->mapping != mapping) 103750b4987SNick Piggin return -EIO; 1041da177e4SLinus Torvalds 105266cf658SDavid Howells if (page_has_private(page)) 1061da177e4SLinus Torvalds do_invalidatepage(page, 0); 1071da177e4SLinus Torvalds 108a2b34564SBjorn Steinbrink cancel_dirty_page(page, PAGE_CACHE_SIZE); 109a2b34564SBjorn Steinbrink 110ba470de4SRik van Riel clear_page_mlock(page); 1111da177e4SLinus Torvalds ClearPageMappedToDisk(page); 1125adc7b51SMinchan Kim delete_from_page_cache(page); 113750b4987SNick Piggin return 0; 1141da177e4SLinus Torvalds } 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds /* 117fc0ecff6SAndrew Morton * This is for invalidate_mapping_pages(). That function can be called at 1181da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 1190fd0e6b0SNick Piggin * be marked dirty at any time too, so use remove_mapping which safely 1200fd0e6b0SNick Piggin * discards clean, unused pages. 1211da177e4SLinus Torvalds * 1221da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 1231da177e4SLinus Torvalds */ 1241da177e4SLinus Torvalds static int 1251da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 1261da177e4SLinus Torvalds { 1270fd0e6b0SNick Piggin int ret; 1280fd0e6b0SNick Piggin 1291da177e4SLinus Torvalds if (page->mapping != mapping) 1301da177e4SLinus Torvalds return 0; 1311da177e4SLinus Torvalds 132266cf658SDavid Howells if (page_has_private(page) && !try_to_release_page(page, 0)) 1331da177e4SLinus Torvalds return 0; 1341da177e4SLinus Torvalds 135ba470de4SRik van Riel clear_page_mlock(page); 1360fd0e6b0SNick Piggin ret = remove_mapping(mapping, page); 1370fd0e6b0SNick Piggin 1380fd0e6b0SNick Piggin return ret; 1391da177e4SLinus Torvalds } 1401da177e4SLinus Torvalds 141750b4987SNick Piggin int truncate_inode_page(struct address_space *mapping, struct page *page) 142750b4987SNick Piggin { 143750b4987SNick Piggin if (page_mapped(page)) { 144750b4987SNick Piggin unmap_mapping_range(mapping, 145750b4987SNick Piggin (loff_t)page->index << PAGE_CACHE_SHIFT, 146750b4987SNick Piggin PAGE_CACHE_SIZE, 0); 147750b4987SNick Piggin } 148750b4987SNick Piggin return truncate_complete_page(mapping, page); 149750b4987SNick Piggin } 150750b4987SNick Piggin 15183f78668SWu Fengguang /* 15225718736SAndi Kleen * Used to get rid of pages on hardware memory corruption. 15325718736SAndi Kleen */ 15425718736SAndi Kleen int generic_error_remove_page(struct address_space *mapping, struct page *page) 15525718736SAndi Kleen { 15625718736SAndi Kleen if (!mapping) 15725718736SAndi Kleen return -EINVAL; 15825718736SAndi Kleen /* 15925718736SAndi Kleen * Only punch for normal data pages for now. 16025718736SAndi Kleen * Handling other types like directories would need more auditing. 16125718736SAndi Kleen */ 16225718736SAndi Kleen if (!S_ISREG(mapping->host->i_mode)) 16325718736SAndi Kleen return -EIO; 16425718736SAndi Kleen return truncate_inode_page(mapping, page); 16525718736SAndi Kleen } 16625718736SAndi Kleen EXPORT_SYMBOL(generic_error_remove_page); 16725718736SAndi Kleen 16825718736SAndi Kleen /* 16983f78668SWu Fengguang * Safely invalidate one page from its pagecache mapping. 17083f78668SWu Fengguang * It only drops clean, unused pages. The page must be locked. 17183f78668SWu Fengguang * 17283f78668SWu Fengguang * Returns 1 if the page is successfully invalidated, otherwise 0. 17383f78668SWu Fengguang */ 17483f78668SWu Fengguang int invalidate_inode_page(struct page *page) 17583f78668SWu Fengguang { 17683f78668SWu Fengguang struct address_space *mapping = page_mapping(page); 17783f78668SWu Fengguang if (!mapping) 17883f78668SWu Fengguang return 0; 17983f78668SWu Fengguang if (PageDirty(page) || PageWriteback(page)) 18083f78668SWu Fengguang return 0; 18183f78668SWu Fengguang if (page_mapped(page)) 18283f78668SWu Fengguang return 0; 18383f78668SWu Fengguang return invalidate_complete_page(mapping, page); 18483f78668SWu Fengguang } 18583f78668SWu Fengguang 1861da177e4SLinus Torvalds /** 187*73c1e204SLiu Bo * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets 1881da177e4SLinus Torvalds * @mapping: mapping to truncate 1891da177e4SLinus Torvalds * @lstart: offset from which to truncate 190d7339071SHans Reiser * @lend: offset to which to truncate 1911da177e4SLinus Torvalds * 192d7339071SHans Reiser * Truncate the page cache, removing the pages that are between 193d7339071SHans Reiser * specified offsets (and zeroing out partial page 194d7339071SHans Reiser * (if lstart is not page aligned)). 1951da177e4SLinus Torvalds * 1961da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 1971da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 1981da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 1991da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 2001da177e4SLinus Torvalds * is low. 2011da177e4SLinus Torvalds * 2021da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 2031da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 2041da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 2051da177e4SLinus Torvalds */ 206d7339071SHans Reiser void truncate_inode_pages_range(struct address_space *mapping, 207d7339071SHans Reiser loff_t lstart, loff_t lend) 2081da177e4SLinus Torvalds { 2091da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 2101da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 2111da177e4SLinus Torvalds struct pagevec pvec; 212b85e0effSHugh Dickins pgoff_t index; 213b85e0effSHugh Dickins pgoff_t end; 2141da177e4SLinus Torvalds int i; 2151da177e4SLinus Torvalds 216c515e1fdSDan Magenheimer cleancache_flush_inode(mapping); 2171da177e4SLinus Torvalds if (mapping->nrpages == 0) 2181da177e4SLinus Torvalds return; 2191da177e4SLinus Torvalds 220d7339071SHans Reiser BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 221d7339071SHans Reiser end = (lend >> PAGE_CACHE_SHIFT); 222d7339071SHans Reiser 2231da177e4SLinus Torvalds pagevec_init(&pvec, 0); 224b85e0effSHugh Dickins index = start; 225b85e0effSHugh Dickins while (index <= end && pagevec_lookup(&pvec, mapping, index, 226b85e0effSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 227e5598f8bSHugh Dickins mem_cgroup_uncharge_start(); 2281da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2291da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2301da177e4SLinus Torvalds 231b85e0effSHugh Dickins /* We rely upon deletion not changing page->index */ 232b85e0effSHugh Dickins index = page->index; 233b85e0effSHugh Dickins if (index > end) 234d7339071SHans Reiser break; 235d7339071SHans Reiser 236529ae9aaSNick Piggin if (!trylock_page(page)) 2371da177e4SLinus Torvalds continue; 238b85e0effSHugh Dickins WARN_ON(page->index != index); 2391da177e4SLinus Torvalds if (PageWriteback(page)) { 2401da177e4SLinus Torvalds unlock_page(page); 2411da177e4SLinus Torvalds continue; 2421da177e4SLinus Torvalds } 243750b4987SNick Piggin truncate_inode_page(mapping, page); 2441da177e4SLinus Torvalds unlock_page(page); 2451da177e4SLinus Torvalds } 2461da177e4SLinus Torvalds pagevec_release(&pvec); 247e5598f8bSHugh Dickins mem_cgroup_uncharge_end(); 2481da177e4SLinus Torvalds cond_resched(); 249b85e0effSHugh Dickins index++; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds if (partial) { 2531da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 2541da177e4SLinus Torvalds if (page) { 2551da177e4SLinus Torvalds wait_on_page_writeback(page); 2561da177e4SLinus Torvalds truncate_partial_page(page, partial); 2571da177e4SLinus Torvalds unlock_page(page); 2581da177e4SLinus Torvalds page_cache_release(page); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds } 2611da177e4SLinus Torvalds 262b85e0effSHugh Dickins index = start; 2631da177e4SLinus Torvalds for ( ; ; ) { 2641da177e4SLinus Torvalds cond_resched(); 265b85e0effSHugh Dickins if (!pagevec_lookup(&pvec, mapping, index, 266b85e0effSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 267b85e0effSHugh Dickins if (index == start) 2681da177e4SLinus Torvalds break; 269b85e0effSHugh Dickins index = start; 2701da177e4SLinus Torvalds continue; 2711da177e4SLinus Torvalds } 272d0823576SHugh Dickins if (index == start && pvec.pages[0]->index > end) { 273d7339071SHans Reiser pagevec_release(&pvec); 274d7339071SHans Reiser break; 275d7339071SHans Reiser } 276569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_start(); 2771da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2781da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2791da177e4SLinus Torvalds 280b85e0effSHugh Dickins /* We rely upon deletion not changing page->index */ 281b85e0effSHugh Dickins index = page->index; 282b85e0effSHugh Dickins if (index > end) 283d7339071SHans Reiser break; 284b85e0effSHugh Dickins 2851da177e4SLinus Torvalds lock_page(page); 286b85e0effSHugh Dickins WARN_ON(page->index != index); 2871da177e4SLinus Torvalds wait_on_page_writeback(page); 288750b4987SNick Piggin truncate_inode_page(mapping, page); 2891da177e4SLinus Torvalds unlock_page(page); 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds pagevec_release(&pvec); 292569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_end(); 293b85e0effSHugh Dickins index++; 2941da177e4SLinus Torvalds } 295c515e1fdSDan Magenheimer cleancache_flush_inode(mapping); 2961da177e4SLinus Torvalds } 297d7339071SHans Reiser EXPORT_SYMBOL(truncate_inode_pages_range); 2981da177e4SLinus Torvalds 299d7339071SHans Reiser /** 300d7339071SHans Reiser * truncate_inode_pages - truncate *all* the pages from an offset 301d7339071SHans Reiser * @mapping: mapping to truncate 302d7339071SHans Reiser * @lstart: offset from which to truncate 303d7339071SHans Reiser * 3041b1dcc1bSJes Sorensen * Called under (and serialised by) inode->i_mutex. 30508142579SJan Kara * 30608142579SJan Kara * Note: When this function returns, there can be a page in the process of 30708142579SJan Kara * deletion (inside __delete_from_page_cache()) in the specified range. Thus 30808142579SJan Kara * mapping->nrpages can be non-zero when this function returns even after 30908142579SJan Kara * truncation of the whole mapping. 310d7339071SHans Reiser */ 311d7339071SHans Reiser void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 312d7339071SHans Reiser { 313d7339071SHans Reiser truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 314d7339071SHans Reiser } 3151da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 3161da177e4SLinus Torvalds 31728697355SMike Waychison /** 31828697355SMike Waychison * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 31928697355SMike Waychison * @mapping: the address_space which holds the pages to invalidate 32028697355SMike Waychison * @start: the offset 'from' which to invalidate 32128697355SMike Waychison * @end: the offset 'to' which to invalidate (inclusive) 32228697355SMike Waychison * 32328697355SMike Waychison * This function only removes the unlocked pages, if you want to 32428697355SMike Waychison * remove all the pages of one inode, you must call truncate_inode_pages. 32528697355SMike Waychison * 32628697355SMike Waychison * invalidate_mapping_pages() will not block on IO activity. It will not 32728697355SMike Waychison * invalidate pages which are dirty, locked, under writeback or mapped into 32828697355SMike Waychison * pagetables. 32928697355SMike Waychison */ 33028697355SMike Waychison unsigned long invalidate_mapping_pages(struct address_space *mapping, 33128697355SMike Waychison pgoff_t start, pgoff_t end) 3321da177e4SLinus Torvalds { 3331da177e4SLinus Torvalds struct pagevec pvec; 334b85e0effSHugh Dickins pgoff_t index = start; 33531560180SMinchan Kim unsigned long ret; 33631560180SMinchan Kim unsigned long count = 0; 3371da177e4SLinus Torvalds int i; 3381da177e4SLinus Torvalds 33931475dd6SHugh Dickins /* 34031475dd6SHugh Dickins * Note: this function may get called on a shmem/tmpfs mapping: 34131475dd6SHugh Dickins * pagevec_lookup() might then return 0 prematurely (because it 34231475dd6SHugh Dickins * got a gangful of swap entries); but it's hardly worth worrying 34331475dd6SHugh Dickins * about - it can rarely have anything to free from such a mapping 34431475dd6SHugh Dickins * (most pages are dirty), and already skips over any difficulties. 34531475dd6SHugh Dickins */ 34631475dd6SHugh Dickins 3471da177e4SLinus Torvalds pagevec_init(&pvec, 0); 348b85e0effSHugh Dickins while (index <= end && pagevec_lookup(&pvec, mapping, index, 349b85e0effSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 350569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_start(); 3511da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 3521da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 3531da177e4SLinus Torvalds 354b85e0effSHugh Dickins /* We rely upon deletion not changing page->index */ 355e0f23603SNeilBrown index = page->index; 356b85e0effSHugh Dickins if (index > end) 357b85e0effSHugh Dickins break; 358e0f23603SNeilBrown 359b85e0effSHugh Dickins if (!trylock_page(page)) 360b85e0effSHugh Dickins continue; 361b85e0effSHugh Dickins WARN_ON(page->index != index); 36231560180SMinchan Kim ret = invalidate_inode_page(page); 3631da177e4SLinus Torvalds unlock_page(page); 36431560180SMinchan Kim /* 36531560180SMinchan Kim * Invalidation is a hint that the page is no longer 36631560180SMinchan Kim * of interest and try to speed up its reclaim. 36731560180SMinchan Kim */ 36831560180SMinchan Kim if (!ret) 36931560180SMinchan Kim deactivate_page(page); 37031560180SMinchan Kim count += ret; 3711da177e4SLinus Torvalds } 3721da177e4SLinus Torvalds pagevec_release(&pvec); 373569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_end(); 374fc9a07e7SAndrew Morton cond_resched(); 375b85e0effSHugh Dickins index++; 3761da177e4SLinus Torvalds } 37731560180SMinchan Kim return count; 3781da177e4SLinus Torvalds } 37954bc4855SAnton Altaparmakov EXPORT_SYMBOL(invalidate_mapping_pages); 3801da177e4SLinus Torvalds 381bd4c8ce4SAndrew Morton /* 382bd4c8ce4SAndrew Morton * This is like invalidate_complete_page(), except it ignores the page's 383bd4c8ce4SAndrew Morton * refcount. We do this because invalidate_inode_pages2() needs stronger 384bd4c8ce4SAndrew Morton * invalidation guarantees, and cannot afford to leave pages behind because 3852706a1b8SAnderson Briglia * shrink_page_list() has a temp ref on them, or because they're transiently 3862706a1b8SAnderson Briglia * sitting in the lru_cache_add() pagevecs. 387bd4c8ce4SAndrew Morton */ 388bd4c8ce4SAndrew Morton static int 389bd4c8ce4SAndrew Morton invalidate_complete_page2(struct address_space *mapping, struct page *page) 390bd4c8ce4SAndrew Morton { 391bd4c8ce4SAndrew Morton if (page->mapping != mapping) 392bd4c8ce4SAndrew Morton return 0; 393bd4c8ce4SAndrew Morton 394266cf658SDavid Howells if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 395bd4c8ce4SAndrew Morton return 0; 396bd4c8ce4SAndrew Morton 39719fd6231SNick Piggin spin_lock_irq(&mapping->tree_lock); 398bd4c8ce4SAndrew Morton if (PageDirty(page)) 399bd4c8ce4SAndrew Morton goto failed; 400bd4c8ce4SAndrew Morton 401ba470de4SRik van Riel clear_page_mlock(page); 402266cf658SDavid Howells BUG_ON(page_has_private(page)); 403e64a782fSMinchan Kim __delete_from_page_cache(page); 40419fd6231SNick Piggin spin_unlock_irq(&mapping->tree_lock); 405e767e056SDaisuke Nishimura mem_cgroup_uncharge_cache_page(page); 4066072d13cSLinus Torvalds 4076072d13cSLinus Torvalds if (mapping->a_ops->freepage) 4086072d13cSLinus Torvalds mapping->a_ops->freepage(page); 4096072d13cSLinus Torvalds 410bd4c8ce4SAndrew Morton page_cache_release(page); /* pagecache ref */ 411bd4c8ce4SAndrew Morton return 1; 412bd4c8ce4SAndrew Morton failed: 41319fd6231SNick Piggin spin_unlock_irq(&mapping->tree_lock); 414bd4c8ce4SAndrew Morton return 0; 415bd4c8ce4SAndrew Morton } 416bd4c8ce4SAndrew Morton 417e3db7691STrond Myklebust static int do_launder_page(struct address_space *mapping, struct page *page) 418e3db7691STrond Myklebust { 419e3db7691STrond Myklebust if (!PageDirty(page)) 420e3db7691STrond Myklebust return 0; 421e3db7691STrond Myklebust if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) 422e3db7691STrond Myklebust return 0; 423e3db7691STrond Myklebust return mapping->a_ops->launder_page(page); 424e3db7691STrond Myklebust } 425e3db7691STrond Myklebust 4261da177e4SLinus Torvalds /** 4271da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 42867be2dd1SMartin Waitz * @mapping: the address_space 4291da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 4301da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 4311da177e4SLinus Torvalds * 4321da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 4331da177e4SLinus Torvalds * invalidation. 4341da177e4SLinus Torvalds * 4356ccfa806SHisashi Hifumi * Returns -EBUSY if any pages could not be invalidated. 4361da177e4SLinus Torvalds */ 4371da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 4381da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 4391da177e4SLinus Torvalds { 4401da177e4SLinus Torvalds struct pagevec pvec; 441b85e0effSHugh Dickins pgoff_t index; 4421da177e4SLinus Torvalds int i; 4431da177e4SLinus Torvalds int ret = 0; 4440dd1334fSHisashi Hifumi int ret2 = 0; 4451da177e4SLinus Torvalds int did_range_unmap = 0; 4461da177e4SLinus Torvalds 447c515e1fdSDan Magenheimer cleancache_flush_inode(mapping); 4481da177e4SLinus Torvalds pagevec_init(&pvec, 0); 449b85e0effSHugh Dickins index = start; 450b85e0effSHugh Dickins while (index <= end && pagevec_lookup(&pvec, mapping, index, 451b85e0effSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 452569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_start(); 4537b965e08STrond Myklebust for (i = 0; i < pagevec_count(&pvec); i++) { 4541da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 455b85e0effSHugh Dickins 456b85e0effSHugh Dickins /* We rely upon deletion not changing page->index */ 457b85e0effSHugh Dickins index = page->index; 458b85e0effSHugh Dickins if (index > end) 459b85e0effSHugh Dickins break; 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds lock_page(page); 462b85e0effSHugh Dickins WARN_ON(page->index != index); 4631da177e4SLinus Torvalds if (page->mapping != mapping) { 4641da177e4SLinus Torvalds unlock_page(page); 4651da177e4SLinus Torvalds continue; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds wait_on_page_writeback(page); 468d00806b1SNick Piggin if (page_mapped(page)) { 4691da177e4SLinus Torvalds if (!did_range_unmap) { 4701da177e4SLinus Torvalds /* 4711da177e4SLinus Torvalds * Zap the rest of the file in one hit. 4721da177e4SLinus Torvalds */ 4731da177e4SLinus Torvalds unmap_mapping_range(mapping, 474b85e0effSHugh Dickins (loff_t)index << PAGE_CACHE_SHIFT, 475b85e0effSHugh Dickins (loff_t)(1 + end - index) 4761da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 4771da177e4SLinus Torvalds 0); 4781da177e4SLinus Torvalds did_range_unmap = 1; 4791da177e4SLinus Torvalds } else { 4801da177e4SLinus Torvalds /* 4811da177e4SLinus Torvalds * Just zap this page 4821da177e4SLinus Torvalds */ 4831da177e4SLinus Torvalds unmap_mapping_range(mapping, 484b85e0effSHugh Dickins (loff_t)index << PAGE_CACHE_SHIFT, 4851da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds } 488d00806b1SNick Piggin BUG_ON(page_mapped(page)); 4890dd1334fSHisashi Hifumi ret2 = do_launder_page(mapping, page); 4900dd1334fSHisashi Hifumi if (ret2 == 0) { 4910dd1334fSHisashi Hifumi if (!invalidate_complete_page2(mapping, page)) 4926ccfa806SHisashi Hifumi ret2 = -EBUSY; 4930dd1334fSHisashi Hifumi } 4940dd1334fSHisashi Hifumi if (ret2 < 0) 4950dd1334fSHisashi Hifumi ret = ret2; 4961da177e4SLinus Torvalds unlock_page(page); 4971da177e4SLinus Torvalds } 4981da177e4SLinus Torvalds pagevec_release(&pvec); 499569b846dSKAMEZAWA Hiroyuki mem_cgroup_uncharge_end(); 5001da177e4SLinus Torvalds cond_resched(); 501b85e0effSHugh Dickins index++; 5021da177e4SLinus Torvalds } 503c515e1fdSDan Magenheimer cleancache_flush_inode(mapping); 5041da177e4SLinus Torvalds return ret; 5051da177e4SLinus Torvalds } 5061da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 5071da177e4SLinus Torvalds 5081da177e4SLinus Torvalds /** 5091da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 51067be2dd1SMartin Waitz * @mapping: the address_space 5111da177e4SLinus Torvalds * 5121da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 5131da177e4SLinus Torvalds * invalidation. 5141da177e4SLinus Torvalds * 515e9de25ddSPeng Tao * Returns -EBUSY if any pages could not be invalidated. 5161da177e4SLinus Torvalds */ 5171da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 5181da177e4SLinus Torvalds { 5191da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 52225d9e2d1Snpiggin@suse.de 52325d9e2d1Snpiggin@suse.de /** 52425d9e2d1Snpiggin@suse.de * truncate_pagecache - unmap and remove pagecache that has been truncated 52525d9e2d1Snpiggin@suse.de * @inode: inode 5268a549beaSHugh Dickins * @oldsize: old file size 5278a549beaSHugh Dickins * @newsize: new file size 52825d9e2d1Snpiggin@suse.de * 52925d9e2d1Snpiggin@suse.de * inode's new i_size must already be written before truncate_pagecache 53025d9e2d1Snpiggin@suse.de * is called. 53125d9e2d1Snpiggin@suse.de * 53225d9e2d1Snpiggin@suse.de * This function should typically be called before the filesystem 53325d9e2d1Snpiggin@suse.de * releases resources associated with the freed range (eg. deallocates 53425d9e2d1Snpiggin@suse.de * blocks). This way, pagecache will always stay logically coherent 53525d9e2d1Snpiggin@suse.de * with on-disk format, and the filesystem would not have to deal with 53625d9e2d1Snpiggin@suse.de * situations such as writepage being called for a page that has already 53725d9e2d1Snpiggin@suse.de * had its underlying blocks deallocated. 53825d9e2d1Snpiggin@suse.de */ 5398a549beaSHugh Dickins void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) 54025d9e2d1Snpiggin@suse.de { 54125d9e2d1Snpiggin@suse.de struct address_space *mapping = inode->i_mapping; 5428a549beaSHugh Dickins loff_t holebegin = round_up(newsize, PAGE_SIZE); 54325d9e2d1Snpiggin@suse.de 54425d9e2d1Snpiggin@suse.de /* 54525d9e2d1Snpiggin@suse.de * unmap_mapping_range is called twice, first simply for 54625d9e2d1Snpiggin@suse.de * efficiency so that truncate_inode_pages does fewer 54725d9e2d1Snpiggin@suse.de * single-page unmaps. However after this first call, and 54825d9e2d1Snpiggin@suse.de * before truncate_inode_pages finishes, it is possible for 54925d9e2d1Snpiggin@suse.de * private pages to be COWed, which remain after 55025d9e2d1Snpiggin@suse.de * truncate_inode_pages finishes, hence the second 55125d9e2d1Snpiggin@suse.de * unmap_mapping_range call must be made for correctness. 55225d9e2d1Snpiggin@suse.de */ 5538a549beaSHugh Dickins unmap_mapping_range(mapping, holebegin, 0, 1); 5548a549beaSHugh Dickins truncate_inode_pages(mapping, newsize); 5558a549beaSHugh Dickins unmap_mapping_range(mapping, holebegin, 0, 1); 55625d9e2d1Snpiggin@suse.de } 55725d9e2d1Snpiggin@suse.de EXPORT_SYMBOL(truncate_pagecache); 55825d9e2d1Snpiggin@suse.de 55925d9e2d1Snpiggin@suse.de /** 5602c27c65eSChristoph Hellwig * truncate_setsize - update inode and pagecache for a new file size 5612c27c65eSChristoph Hellwig * @inode: inode 5622c27c65eSChristoph Hellwig * @newsize: new file size 5632c27c65eSChristoph Hellwig * 564382e27daSJan Kara * truncate_setsize updates i_size and performs pagecache truncation (if 565382e27daSJan Kara * necessary) to @newsize. It will be typically be called from the filesystem's 566382e27daSJan Kara * setattr function when ATTR_SIZE is passed in. 5672c27c65eSChristoph Hellwig * 568382e27daSJan Kara * Must be called with inode_mutex held and before all filesystem specific 569382e27daSJan Kara * block truncation has been performed. 5702c27c65eSChristoph Hellwig */ 5712c27c65eSChristoph Hellwig void truncate_setsize(struct inode *inode, loff_t newsize) 5722c27c65eSChristoph Hellwig { 5732c27c65eSChristoph Hellwig loff_t oldsize; 5742c27c65eSChristoph Hellwig 5752c27c65eSChristoph Hellwig oldsize = inode->i_size; 5762c27c65eSChristoph Hellwig i_size_write(inode, newsize); 5772c27c65eSChristoph Hellwig 5782c27c65eSChristoph Hellwig truncate_pagecache(inode, oldsize, newsize); 5792c27c65eSChristoph Hellwig } 5802c27c65eSChristoph Hellwig EXPORT_SYMBOL(truncate_setsize); 5812c27c65eSChristoph Hellwig 5822c27c65eSChristoph Hellwig /** 58325d9e2d1Snpiggin@suse.de * vmtruncate - unmap mappings "freed" by truncate() syscall 58425d9e2d1Snpiggin@suse.de * @inode: inode of the file used 5858a549beaSHugh Dickins * @newsize: file offset to start truncating 58625d9e2d1Snpiggin@suse.de * 5872c27c65eSChristoph Hellwig * This function is deprecated and truncate_setsize or truncate_pagecache 5882c27c65eSChristoph Hellwig * should be used instead, together with filesystem specific block truncation. 58925d9e2d1Snpiggin@suse.de */ 5908a549beaSHugh Dickins int vmtruncate(struct inode *inode, loff_t newsize) 59125d9e2d1Snpiggin@suse.de { 59225d9e2d1Snpiggin@suse.de int error; 59325d9e2d1Snpiggin@suse.de 5948a549beaSHugh Dickins error = inode_newsize_ok(inode, newsize); 59525d9e2d1Snpiggin@suse.de if (error) 59625d9e2d1Snpiggin@suse.de return error; 5977bb46a67Snpiggin@suse.de 5988a549beaSHugh Dickins truncate_setsize(inode, newsize); 59925d9e2d1Snpiggin@suse.de if (inode->i_op->truncate) 60025d9e2d1Snpiggin@suse.de inode->i_op->truncate(inode); 6012c27c65eSChristoph Hellwig return 0; 60225d9e2d1Snpiggin@suse.de } 60325d9e2d1Snpiggin@suse.de EXPORT_SYMBOL(vmtruncate); 6045b8ba101SHugh Dickins 6058a549beaSHugh Dickins int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend) 6065b8ba101SHugh Dickins { 6075b8ba101SHugh Dickins struct address_space *mapping = inode->i_mapping; 6088a549beaSHugh Dickins loff_t holebegin = round_up(lstart, PAGE_SIZE); 6098a549beaSHugh Dickins loff_t holelen = 1 + lend - holebegin; 6105b8ba101SHugh Dickins 6115b8ba101SHugh Dickins /* 6125b8ba101SHugh Dickins * If the underlying filesystem is not going to provide 6135b8ba101SHugh Dickins * a way to truncate a range of blocks (punch a hole) - 6145b8ba101SHugh Dickins * we should return failure right now. 6155b8ba101SHugh Dickins */ 6165b8ba101SHugh Dickins if (!inode->i_op->truncate_range) 6175b8ba101SHugh Dickins return -ENOSYS; 6185b8ba101SHugh Dickins 6195b8ba101SHugh Dickins mutex_lock(&inode->i_mutex); 620bd5fe6c5SChristoph Hellwig inode_dio_wait(inode); 6218a549beaSHugh Dickins unmap_mapping_range(mapping, holebegin, holelen, 1); 6228a549beaSHugh Dickins inode->i_op->truncate_range(inode, lstart, lend); 62394c1e62dSHugh Dickins /* unmap again to remove racily COWed private pages */ 6248a549beaSHugh Dickins unmap_mapping_range(mapping, holebegin, holelen, 1); 6255b8ba101SHugh Dickins mutex_unlock(&inode->i_mutex); 6265b8ba101SHugh Dickins 6275b8ba101SHugh Dickins return 0; 6285b8ba101SHugh Dickins } 629