11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 6e1f8e874SFrancois Cami * 10Sep2002 Andrew Morton 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 114af3c9ccSAlexey Dobriyan #include <linux/backing-dev.h> 121da177e4SLinus Torvalds #include <linux/mm.h> 130fd0e6b0SNick Piggin #include <linux/swap.h> 141da177e4SLinus Torvalds #include <linux/module.h> 151da177e4SLinus Torvalds #include <linux/pagemap.h> 1601f2705dSNate Diller #include <linux/highmem.h> 171da177e4SLinus Torvalds #include <linux/pagevec.h> 18e08748ceSAndrew Morton #include <linux/task_io_accounting_ops.h> 191da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 20aaa4059bSJan Kara do_invalidatepage */ 21ba470de4SRik van Riel #include "internal.h" 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds 24cf9a2ae8SDavid Howells /** 2528bc44d7SFengguang Wu * do_invalidatepage - invalidate part or all of a page 26cf9a2ae8SDavid Howells * @page: the page which is affected 27cf9a2ae8SDavid Howells * @offset: the index of the truncation point 28cf9a2ae8SDavid Howells * 29cf9a2ae8SDavid Howells * do_invalidatepage() is called when all or part of the page has become 30cf9a2ae8SDavid Howells * invalidated by a truncate operation. 31cf9a2ae8SDavid Howells * 32cf9a2ae8SDavid Howells * do_invalidatepage() does not have to release all buffers, but it must 33cf9a2ae8SDavid Howells * ensure that no dirty buffer is left outside @offset and that no I/O 34cf9a2ae8SDavid Howells * is underway against any of the blocks which are outside the truncation 35cf9a2ae8SDavid Howells * point. Because the caller is about to free (and possibly reuse) those 36cf9a2ae8SDavid Howells * blocks on-disk. 37cf9a2ae8SDavid Howells */ 38cf9a2ae8SDavid Howells void do_invalidatepage(struct page *page, unsigned long offset) 39cf9a2ae8SDavid Howells { 40cf9a2ae8SDavid Howells void (*invalidatepage)(struct page *, unsigned long); 41cf9a2ae8SDavid Howells invalidatepage = page->mapping->a_ops->invalidatepage; 429361401eSDavid Howells #ifdef CONFIG_BLOCK 43cf9a2ae8SDavid Howells if (!invalidatepage) 44cf9a2ae8SDavid Howells invalidatepage = block_invalidatepage; 459361401eSDavid Howells #endif 46cf9a2ae8SDavid Howells if (invalidatepage) 47cf9a2ae8SDavid Howells (*invalidatepage)(page, offset); 48cf9a2ae8SDavid Howells } 49cf9a2ae8SDavid Howells 501da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 511da177e4SLinus Torvalds { 52eebd2aa3SChristoph Lameter zero_user_segment(page, partial, PAGE_CACHE_SIZE); 53266cf658SDavid Howells if (page_has_private(page)) 541da177e4SLinus Torvalds do_invalidatepage(page, partial); 551da177e4SLinus Torvalds } 561da177e4SLinus Torvalds 57ecdfc978SLinus Torvalds /* 58ecdfc978SLinus Torvalds * This cancels just the dirty bit on the kernel page itself, it 59ecdfc978SLinus Torvalds * does NOT actually remove dirty bits on any mmap's that may be 60ecdfc978SLinus Torvalds * around. It also leaves the page tagged dirty, so any sync 61ecdfc978SLinus Torvalds * activity will still find it on the dirty lists, and in particular, 62ecdfc978SLinus Torvalds * clear_page_dirty_for_io() will still look at the dirty bits in 63ecdfc978SLinus Torvalds * the VM. 64ecdfc978SLinus Torvalds * 65ecdfc978SLinus Torvalds * Doing this should *normally* only ever be done when a page 66ecdfc978SLinus Torvalds * is truncated, and is not actually mapped anywhere at all. However, 67ecdfc978SLinus Torvalds * fs/buffer.c does this when it notices that somebody has cleaned 68ecdfc978SLinus Torvalds * out all the buffers on a page without actually doing it through 69ecdfc978SLinus Torvalds * the VM. Can you say "ext3 is horribly ugly"? Tought you could. 70ecdfc978SLinus Torvalds */ 71fba2591bSLinus Torvalds void cancel_dirty_page(struct page *page, unsigned int account_size) 72fba2591bSLinus Torvalds { 738368e328SLinus Torvalds if (TestClearPageDirty(page)) { 748368e328SLinus Torvalds struct address_space *mapping = page->mapping; 758368e328SLinus Torvalds if (mapping && mapping_cap_account_dirty(mapping)) { 763e67c098SAndrew Morton dec_zone_page_state(page, NR_FILE_DIRTY); 77c9e51e41SPeter Zijlstra dec_bdi_stat(mapping->backing_dev_info, 78c9e51e41SPeter Zijlstra BDI_RECLAIMABLE); 798368e328SLinus Torvalds if (account_size) 80fba2591bSLinus Torvalds task_io_account_cancelled_write(account_size); 81fba2591bSLinus Torvalds } 823e67c098SAndrew Morton } 838368e328SLinus Torvalds } 848368e328SLinus Torvalds EXPORT_SYMBOL(cancel_dirty_page); 85fba2591bSLinus Torvalds 861da177e4SLinus Torvalds /* 871da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 8862e1c553SShaohua Li * becomes orphaned. It will be left on the LRU and may even be mapped into 8954cb8821SNick Piggin * user pagetables if we're racing with filemap_fault(). 901da177e4SLinus Torvalds * 911da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 921da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 93fc0ecff6SAndrew Morton * its lock, b) when a concurrent invalidate_mapping_pages got there first and 941da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 951da177e4SLinus Torvalds */ 96750b4987SNick Piggin static int 971da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 981da177e4SLinus Torvalds { 991da177e4SLinus Torvalds if (page->mapping != mapping) 100750b4987SNick Piggin return -EIO; 1011da177e4SLinus Torvalds 102266cf658SDavid Howells if (page_has_private(page)) 1031da177e4SLinus Torvalds do_invalidatepage(page, 0); 1041da177e4SLinus Torvalds 105a2b34564SBjorn Steinbrink cancel_dirty_page(page, PAGE_CACHE_SIZE); 106a2b34564SBjorn Steinbrink 107ba470de4SRik van Riel clear_page_mlock(page); 108787d2214SNick Piggin remove_from_page_cache(page); 1091da177e4SLinus Torvalds ClearPageMappedToDisk(page); 1101da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 111750b4987SNick Piggin return 0; 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds /* 115fc0ecff6SAndrew Morton * This is for invalidate_mapping_pages(). That function can be called at 1161da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 1170fd0e6b0SNick Piggin * be marked dirty at any time too, so use remove_mapping which safely 1180fd0e6b0SNick Piggin * discards clean, unused pages. 1191da177e4SLinus Torvalds * 1201da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 1211da177e4SLinus Torvalds */ 1221da177e4SLinus Torvalds static int 1231da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 1241da177e4SLinus Torvalds { 1250fd0e6b0SNick Piggin int ret; 1260fd0e6b0SNick Piggin 1271da177e4SLinus Torvalds if (page->mapping != mapping) 1281da177e4SLinus Torvalds return 0; 1291da177e4SLinus Torvalds 130266cf658SDavid Howells if (page_has_private(page) && !try_to_release_page(page, 0)) 1311da177e4SLinus Torvalds return 0; 1321da177e4SLinus Torvalds 133ba470de4SRik van Riel clear_page_mlock(page); 1340fd0e6b0SNick Piggin ret = remove_mapping(mapping, page); 1350fd0e6b0SNick Piggin 1360fd0e6b0SNick Piggin return ret; 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds 139750b4987SNick Piggin int truncate_inode_page(struct address_space *mapping, struct page *page) 140750b4987SNick Piggin { 141750b4987SNick Piggin if (page_mapped(page)) { 142750b4987SNick Piggin unmap_mapping_range(mapping, 143750b4987SNick Piggin (loff_t)page->index << PAGE_CACHE_SHIFT, 144750b4987SNick Piggin PAGE_CACHE_SIZE, 0); 145750b4987SNick Piggin } 146750b4987SNick Piggin return truncate_complete_page(mapping, page); 147750b4987SNick Piggin } 148750b4987SNick Piggin 14983f78668SWu Fengguang /* 15025718736SAndi Kleen * Used to get rid of pages on hardware memory corruption. 15125718736SAndi Kleen */ 15225718736SAndi Kleen int generic_error_remove_page(struct address_space *mapping, struct page *page) 15325718736SAndi Kleen { 15425718736SAndi Kleen if (!mapping) 15525718736SAndi Kleen return -EINVAL; 15625718736SAndi Kleen /* 15725718736SAndi Kleen * Only punch for normal data pages for now. 15825718736SAndi Kleen * Handling other types like directories would need more auditing. 15925718736SAndi Kleen */ 16025718736SAndi Kleen if (!S_ISREG(mapping->host->i_mode)) 16125718736SAndi Kleen return -EIO; 16225718736SAndi Kleen return truncate_inode_page(mapping, page); 16325718736SAndi Kleen } 16425718736SAndi Kleen EXPORT_SYMBOL(generic_error_remove_page); 16525718736SAndi Kleen 16625718736SAndi Kleen /* 16783f78668SWu Fengguang * Safely invalidate one page from its pagecache mapping. 16883f78668SWu Fengguang * It only drops clean, unused pages. The page must be locked. 16983f78668SWu Fengguang * 17083f78668SWu Fengguang * Returns 1 if the page is successfully invalidated, otherwise 0. 17183f78668SWu Fengguang */ 17283f78668SWu Fengguang int invalidate_inode_page(struct page *page) 17383f78668SWu Fengguang { 17483f78668SWu Fengguang struct address_space *mapping = page_mapping(page); 17583f78668SWu Fengguang if (!mapping) 17683f78668SWu Fengguang return 0; 17783f78668SWu Fengguang if (PageDirty(page) || PageWriteback(page)) 17883f78668SWu Fengguang return 0; 17983f78668SWu Fengguang if (page_mapped(page)) 18083f78668SWu Fengguang return 0; 18183f78668SWu Fengguang return invalidate_complete_page(mapping, page); 18283f78668SWu Fengguang } 18383f78668SWu Fengguang 1841da177e4SLinus Torvalds /** 1850643245fSRandy Dunlap * truncate_inode_pages - truncate range of pages specified by start & end byte offsets 1861da177e4SLinus Torvalds * @mapping: mapping to truncate 1871da177e4SLinus Torvalds * @lstart: offset from which to truncate 188d7339071SHans Reiser * @lend: offset to which to truncate 1891da177e4SLinus Torvalds * 190d7339071SHans Reiser * Truncate the page cache, removing the pages that are between 191d7339071SHans Reiser * specified offsets (and zeroing out partial page 192d7339071SHans Reiser * (if lstart is not page aligned)). 1931da177e4SLinus Torvalds * 1941da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 1951da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 1961da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 1971da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 1981da177e4SLinus Torvalds * is low. 1991da177e4SLinus Torvalds * 2001da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 2011da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 2021da177e4SLinus Torvalds * 2031da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 2041da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 2051da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 2061da177e4SLinus Torvalds */ 207d7339071SHans Reiser void truncate_inode_pages_range(struct address_space *mapping, 208d7339071SHans Reiser loff_t lstart, loff_t lend) 2091da177e4SLinus Torvalds { 2101da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 211d7339071SHans Reiser pgoff_t end; 2121da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 2131da177e4SLinus Torvalds struct pagevec pvec; 2141da177e4SLinus Torvalds pgoff_t next; 2151da177e4SLinus Torvalds int i; 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds if (mapping->nrpages == 0) 2181da177e4SLinus Torvalds return; 2191da177e4SLinus Torvalds 220d7339071SHans Reiser BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 221d7339071SHans Reiser end = (lend >> PAGE_CACHE_SHIFT); 222d7339071SHans Reiser 2231da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2241da177e4SLinus Torvalds next = start; 225d7339071SHans Reiser while (next <= end && 226d7339071SHans Reiser pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2271da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2281da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2291da177e4SLinus Torvalds pgoff_t page_index = page->index; 2301da177e4SLinus Torvalds 231d7339071SHans Reiser if (page_index > end) { 232d7339071SHans Reiser next = page_index; 233d7339071SHans Reiser break; 234d7339071SHans Reiser } 235d7339071SHans Reiser 2361da177e4SLinus Torvalds if (page_index > next) 2371da177e4SLinus Torvalds next = page_index; 2381da177e4SLinus Torvalds next++; 239529ae9aaSNick Piggin if (!trylock_page(page)) 2401da177e4SLinus Torvalds continue; 2411da177e4SLinus Torvalds if (PageWriteback(page)) { 2421da177e4SLinus Torvalds unlock_page(page); 2431da177e4SLinus Torvalds continue; 2441da177e4SLinus Torvalds } 245750b4987SNick Piggin truncate_inode_page(mapping, page); 2461da177e4SLinus Torvalds unlock_page(page); 2471da177e4SLinus Torvalds } 2481da177e4SLinus Torvalds pagevec_release(&pvec); 2491da177e4SLinus Torvalds cond_resched(); 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds if (partial) { 2531da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 2541da177e4SLinus Torvalds if (page) { 2551da177e4SLinus Torvalds wait_on_page_writeback(page); 2561da177e4SLinus Torvalds truncate_partial_page(page, partial); 2571da177e4SLinus Torvalds unlock_page(page); 2581da177e4SLinus Torvalds page_cache_release(page); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds } 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds next = start; 2631da177e4SLinus Torvalds for ( ; ; ) { 2641da177e4SLinus Torvalds cond_resched(); 2651da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2661da177e4SLinus Torvalds if (next == start) 2671da177e4SLinus Torvalds break; 2681da177e4SLinus Torvalds next = start; 2691da177e4SLinus Torvalds continue; 2701da177e4SLinus Torvalds } 271d7339071SHans Reiser if (pvec.pages[0]->index > end) { 272d7339071SHans Reiser pagevec_release(&pvec); 273d7339071SHans Reiser break; 274d7339071SHans Reiser } 2751da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2761da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2771da177e4SLinus Torvalds 278d7339071SHans Reiser if (page->index > end) 279d7339071SHans Reiser break; 2801da177e4SLinus Torvalds lock_page(page); 2811da177e4SLinus Torvalds wait_on_page_writeback(page); 282750b4987SNick Piggin truncate_inode_page(mapping, page); 2831da177e4SLinus Torvalds if (page->index > next) 2841da177e4SLinus Torvalds next = page->index; 2851da177e4SLinus Torvalds next++; 2861da177e4SLinus Torvalds unlock_page(page); 2871da177e4SLinus Torvalds } 2881da177e4SLinus Torvalds pagevec_release(&pvec); 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds } 291d7339071SHans Reiser EXPORT_SYMBOL(truncate_inode_pages_range); 2921da177e4SLinus Torvalds 293d7339071SHans Reiser /** 294d7339071SHans Reiser * truncate_inode_pages - truncate *all* the pages from an offset 295d7339071SHans Reiser * @mapping: mapping to truncate 296d7339071SHans Reiser * @lstart: offset from which to truncate 297d7339071SHans Reiser * 2981b1dcc1bSJes Sorensen * Called under (and serialised by) inode->i_mutex. 299d7339071SHans Reiser */ 300d7339071SHans Reiser void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 301d7339071SHans Reiser { 302d7339071SHans Reiser truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 303d7339071SHans Reiser } 3041da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 3051da177e4SLinus Torvalds 30628697355SMike Waychison /** 30728697355SMike Waychison * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 30828697355SMike Waychison * @mapping: the address_space which holds the pages to invalidate 30928697355SMike Waychison * @start: the offset 'from' which to invalidate 31028697355SMike Waychison * @end: the offset 'to' which to invalidate (inclusive) 31128697355SMike Waychison * 31228697355SMike Waychison * This function only removes the unlocked pages, if you want to 31328697355SMike Waychison * remove all the pages of one inode, you must call truncate_inode_pages. 31428697355SMike Waychison * 31528697355SMike Waychison * invalidate_mapping_pages() will not block on IO activity. It will not 31628697355SMike Waychison * invalidate pages which are dirty, locked, under writeback or mapped into 31728697355SMike Waychison * pagetables. 31828697355SMike Waychison */ 31928697355SMike Waychison unsigned long invalidate_mapping_pages(struct address_space *mapping, 32028697355SMike Waychison pgoff_t start, pgoff_t end) 3211da177e4SLinus Torvalds { 3221da177e4SLinus Torvalds struct pagevec pvec; 3231da177e4SLinus Torvalds pgoff_t next = start; 3241da177e4SLinus Torvalds unsigned long ret = 0; 3251da177e4SLinus Torvalds int i; 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds pagevec_init(&pvec, 0); 3281da177e4SLinus Torvalds while (next <= end && 3291da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 3301da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 3311da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 332e0f23603SNeilBrown pgoff_t index; 333e0f23603SNeilBrown int lock_failed; 3341da177e4SLinus Torvalds 335529ae9aaSNick Piggin lock_failed = !trylock_page(page); 336e0f23603SNeilBrown 337e0f23603SNeilBrown /* 338e0f23603SNeilBrown * We really shouldn't be looking at the ->index of an 339e0f23603SNeilBrown * unlocked page. But we're not allowed to lock these 340e0f23603SNeilBrown * pages. So we rely upon nobody altering the ->index 341e0f23603SNeilBrown * of this (pinned-by-us) page. 342e0f23603SNeilBrown */ 343e0f23603SNeilBrown index = page->index; 344e0f23603SNeilBrown if (index > next) 345e0f23603SNeilBrown next = index; 3461da177e4SLinus Torvalds next++; 347e0f23603SNeilBrown if (lock_failed) 3481da177e4SLinus Torvalds continue; 349e0f23603SNeilBrown 35083f78668SWu Fengguang ret += invalidate_inode_page(page); 35183f78668SWu Fengguang 3521da177e4SLinus Torvalds unlock_page(page); 3531da177e4SLinus Torvalds if (next > end) 3541da177e4SLinus Torvalds break; 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds pagevec_release(&pvec); 357fc9a07e7SAndrew Morton cond_resched(); 3581da177e4SLinus Torvalds } 3591da177e4SLinus Torvalds return ret; 3601da177e4SLinus Torvalds } 36154bc4855SAnton Altaparmakov EXPORT_SYMBOL(invalidate_mapping_pages); 3621da177e4SLinus Torvalds 363bd4c8ce4SAndrew Morton /* 364bd4c8ce4SAndrew Morton * This is like invalidate_complete_page(), except it ignores the page's 365bd4c8ce4SAndrew Morton * refcount. We do this because invalidate_inode_pages2() needs stronger 366bd4c8ce4SAndrew Morton * invalidation guarantees, and cannot afford to leave pages behind because 3672706a1b8SAnderson Briglia * shrink_page_list() has a temp ref on them, or because they're transiently 3682706a1b8SAnderson Briglia * sitting in the lru_cache_add() pagevecs. 369bd4c8ce4SAndrew Morton */ 370bd4c8ce4SAndrew Morton static int 371bd4c8ce4SAndrew Morton invalidate_complete_page2(struct address_space *mapping, struct page *page) 372bd4c8ce4SAndrew Morton { 373bd4c8ce4SAndrew Morton if (page->mapping != mapping) 374bd4c8ce4SAndrew Morton return 0; 375bd4c8ce4SAndrew Morton 376266cf658SDavid Howells if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 377bd4c8ce4SAndrew Morton return 0; 378bd4c8ce4SAndrew Morton 37919fd6231SNick Piggin spin_lock_irq(&mapping->tree_lock); 380bd4c8ce4SAndrew Morton if (PageDirty(page)) 381bd4c8ce4SAndrew Morton goto failed; 382bd4c8ce4SAndrew Morton 383ba470de4SRik van Riel clear_page_mlock(page); 384266cf658SDavid Howells BUG_ON(page_has_private(page)); 385bd4c8ce4SAndrew Morton __remove_from_page_cache(page); 38619fd6231SNick Piggin spin_unlock_irq(&mapping->tree_lock); 387e767e056SDaisuke Nishimura mem_cgroup_uncharge_cache_page(page); 388bd4c8ce4SAndrew Morton page_cache_release(page); /* pagecache ref */ 389bd4c8ce4SAndrew Morton return 1; 390bd4c8ce4SAndrew Morton failed: 39119fd6231SNick Piggin spin_unlock_irq(&mapping->tree_lock); 392bd4c8ce4SAndrew Morton return 0; 393bd4c8ce4SAndrew Morton } 394bd4c8ce4SAndrew Morton 395e3db7691STrond Myklebust static int do_launder_page(struct address_space *mapping, struct page *page) 396e3db7691STrond Myklebust { 397e3db7691STrond Myklebust if (!PageDirty(page)) 398e3db7691STrond Myklebust return 0; 399e3db7691STrond Myklebust if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) 400e3db7691STrond Myklebust return 0; 401e3db7691STrond Myklebust return mapping->a_ops->launder_page(page); 402e3db7691STrond Myklebust } 403e3db7691STrond Myklebust 4041da177e4SLinus Torvalds /** 4051da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 40667be2dd1SMartin Waitz * @mapping: the address_space 4071da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 4081da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 4091da177e4SLinus Torvalds * 4101da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 4111da177e4SLinus Torvalds * invalidation. 4121da177e4SLinus Torvalds * 4136ccfa806SHisashi Hifumi * Returns -EBUSY if any pages could not be invalidated. 4141da177e4SLinus Torvalds */ 4151da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 4161da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 4171da177e4SLinus Torvalds { 4181da177e4SLinus Torvalds struct pagevec pvec; 4191da177e4SLinus Torvalds pgoff_t next; 4201da177e4SLinus Torvalds int i; 4211da177e4SLinus Torvalds int ret = 0; 4220dd1334fSHisashi Hifumi int ret2 = 0; 4231da177e4SLinus Torvalds int did_range_unmap = 0; 4241da177e4SLinus Torvalds int wrapped = 0; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds pagevec_init(&pvec, 0); 4271da177e4SLinus Torvalds next = start; 4287b965e08STrond Myklebust while (next <= end && !wrapped && 4291da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 4301da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 4317b965e08STrond Myklebust for (i = 0; i < pagevec_count(&pvec); i++) { 4321da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 4331da177e4SLinus Torvalds pgoff_t page_index; 4341da177e4SLinus Torvalds 4351da177e4SLinus Torvalds lock_page(page); 4361da177e4SLinus Torvalds if (page->mapping != mapping) { 4371da177e4SLinus Torvalds unlock_page(page); 4381da177e4SLinus Torvalds continue; 4391da177e4SLinus Torvalds } 4401da177e4SLinus Torvalds page_index = page->index; 4411da177e4SLinus Torvalds next = page_index + 1; 4421da177e4SLinus Torvalds if (next == 0) 4431da177e4SLinus Torvalds wrapped = 1; 4441da177e4SLinus Torvalds if (page_index > end) { 4451da177e4SLinus Torvalds unlock_page(page); 4461da177e4SLinus Torvalds break; 4471da177e4SLinus Torvalds } 4481da177e4SLinus Torvalds wait_on_page_writeback(page); 449d00806b1SNick Piggin if (page_mapped(page)) { 4501da177e4SLinus Torvalds if (!did_range_unmap) { 4511da177e4SLinus Torvalds /* 4521da177e4SLinus Torvalds * Zap the rest of the file in one hit. 4531da177e4SLinus Torvalds */ 4541da177e4SLinus Torvalds unmap_mapping_range(mapping, 455479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 456479ef592SOleg Drokin (loff_t)(end - page_index + 1) 4571da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 4581da177e4SLinus Torvalds 0); 4591da177e4SLinus Torvalds did_range_unmap = 1; 4601da177e4SLinus Torvalds } else { 4611da177e4SLinus Torvalds /* 4621da177e4SLinus Torvalds * Just zap this page 4631da177e4SLinus Torvalds */ 4641da177e4SLinus Torvalds unmap_mapping_range(mapping, 465479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 4661da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds } 469d00806b1SNick Piggin BUG_ON(page_mapped(page)); 4700dd1334fSHisashi Hifumi ret2 = do_launder_page(mapping, page); 4710dd1334fSHisashi Hifumi if (ret2 == 0) { 4720dd1334fSHisashi Hifumi if (!invalidate_complete_page2(mapping, page)) 4736ccfa806SHisashi Hifumi ret2 = -EBUSY; 4740dd1334fSHisashi Hifumi } 4750dd1334fSHisashi Hifumi if (ret2 < 0) 4760dd1334fSHisashi Hifumi ret = ret2; 4771da177e4SLinus Torvalds unlock_page(page); 4781da177e4SLinus Torvalds } 4791da177e4SLinus Torvalds pagevec_release(&pvec); 4801da177e4SLinus Torvalds cond_resched(); 4811da177e4SLinus Torvalds } 4821da177e4SLinus Torvalds return ret; 4831da177e4SLinus Torvalds } 4841da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds /** 4871da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 48867be2dd1SMartin Waitz * @mapping: the address_space 4891da177e4SLinus Torvalds * 4901da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 4911da177e4SLinus Torvalds * invalidation. 4921da177e4SLinus Torvalds * 493*e9de25ddSPeng Tao * Returns -EBUSY if any pages could not be invalidated. 4941da177e4SLinus Torvalds */ 4951da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 4961da177e4SLinus Torvalds { 4971da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 4981da177e4SLinus Torvalds } 4991da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 50025d9e2d1Snpiggin@suse.de 50125d9e2d1Snpiggin@suse.de /** 50225d9e2d1Snpiggin@suse.de * truncate_pagecache - unmap and remove pagecache that has been truncated 50325d9e2d1Snpiggin@suse.de * @inode: inode 50425d9e2d1Snpiggin@suse.de * @old: old file offset 50525d9e2d1Snpiggin@suse.de * @new: new file offset 50625d9e2d1Snpiggin@suse.de * 50725d9e2d1Snpiggin@suse.de * inode's new i_size must already be written before truncate_pagecache 50825d9e2d1Snpiggin@suse.de * is called. 50925d9e2d1Snpiggin@suse.de * 51025d9e2d1Snpiggin@suse.de * This function should typically be called before the filesystem 51125d9e2d1Snpiggin@suse.de * releases resources associated with the freed range (eg. deallocates 51225d9e2d1Snpiggin@suse.de * blocks). This way, pagecache will always stay logically coherent 51325d9e2d1Snpiggin@suse.de * with on-disk format, and the filesystem would not have to deal with 51425d9e2d1Snpiggin@suse.de * situations such as writepage being called for a page that has already 51525d9e2d1Snpiggin@suse.de * had its underlying blocks deallocated. 51625d9e2d1Snpiggin@suse.de */ 51725d9e2d1Snpiggin@suse.de void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) 51825d9e2d1Snpiggin@suse.de { 51925d9e2d1Snpiggin@suse.de if (new < old) { 52025d9e2d1Snpiggin@suse.de struct address_space *mapping = inode->i_mapping; 52125d9e2d1Snpiggin@suse.de 52225d9e2d1Snpiggin@suse.de /* 52325d9e2d1Snpiggin@suse.de * unmap_mapping_range is called twice, first simply for 52425d9e2d1Snpiggin@suse.de * efficiency so that truncate_inode_pages does fewer 52525d9e2d1Snpiggin@suse.de * single-page unmaps. However after this first call, and 52625d9e2d1Snpiggin@suse.de * before truncate_inode_pages finishes, it is possible for 52725d9e2d1Snpiggin@suse.de * private pages to be COWed, which remain after 52825d9e2d1Snpiggin@suse.de * truncate_inode_pages finishes, hence the second 52925d9e2d1Snpiggin@suse.de * unmap_mapping_range call must be made for correctness. 53025d9e2d1Snpiggin@suse.de */ 53125d9e2d1Snpiggin@suse.de unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); 53225d9e2d1Snpiggin@suse.de truncate_inode_pages(mapping, new); 53325d9e2d1Snpiggin@suse.de unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); 53425d9e2d1Snpiggin@suse.de } 53525d9e2d1Snpiggin@suse.de } 53625d9e2d1Snpiggin@suse.de EXPORT_SYMBOL(truncate_pagecache); 53725d9e2d1Snpiggin@suse.de 53825d9e2d1Snpiggin@suse.de /** 53925d9e2d1Snpiggin@suse.de * vmtruncate - unmap mappings "freed" by truncate() syscall 54025d9e2d1Snpiggin@suse.de * @inode: inode of the file used 54125d9e2d1Snpiggin@suse.de * @offset: file offset to start truncating 54225d9e2d1Snpiggin@suse.de * 54325d9e2d1Snpiggin@suse.de * NOTE! We have to be ready to update the memory sharing 54425d9e2d1Snpiggin@suse.de * between the file and the memory map for a potential last 54525d9e2d1Snpiggin@suse.de * incomplete page. Ugly, but necessary. 54625d9e2d1Snpiggin@suse.de */ 54725d9e2d1Snpiggin@suse.de int vmtruncate(struct inode *inode, loff_t offset) 54825d9e2d1Snpiggin@suse.de { 54925d9e2d1Snpiggin@suse.de loff_t oldsize; 55025d9e2d1Snpiggin@suse.de int error; 55125d9e2d1Snpiggin@suse.de 55225d9e2d1Snpiggin@suse.de error = inode_newsize_ok(inode, offset); 55325d9e2d1Snpiggin@suse.de if (error) 55425d9e2d1Snpiggin@suse.de return error; 55525d9e2d1Snpiggin@suse.de oldsize = inode->i_size; 55625d9e2d1Snpiggin@suse.de i_size_write(inode, offset); 55725d9e2d1Snpiggin@suse.de truncate_pagecache(inode, oldsize, offset); 55825d9e2d1Snpiggin@suse.de if (inode->i_op->truncate) 55925d9e2d1Snpiggin@suse.de inode->i_op->truncate(inode); 56025d9e2d1Snpiggin@suse.de 56125d9e2d1Snpiggin@suse.de return error; 56225d9e2d1Snpiggin@suse.de } 56325d9e2d1Snpiggin@suse.de EXPORT_SYMBOL(vmtruncate); 564