11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 10Sep2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/mm.h> 120fd0e6b0SNick Piggin #include <linux/swap.h> 131da177e4SLinus Torvalds #include <linux/module.h> 141da177e4SLinus Torvalds #include <linux/pagemap.h> 151da177e4SLinus Torvalds #include <linux/pagevec.h> 161da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 17aaa4059bSJan Kara do_invalidatepage */ 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds 20*cf9a2ae8SDavid Howells /** 21*cf9a2ae8SDavid Howells * do_invalidatepage - invalidate part of all of a page 22*cf9a2ae8SDavid Howells * @page: the page which is affected 23*cf9a2ae8SDavid Howells * @offset: the index of the truncation point 24*cf9a2ae8SDavid Howells * 25*cf9a2ae8SDavid Howells * do_invalidatepage() is called when all or part of the page has become 26*cf9a2ae8SDavid Howells * invalidated by a truncate operation. 27*cf9a2ae8SDavid Howells * 28*cf9a2ae8SDavid Howells * do_invalidatepage() does not have to release all buffers, but it must 29*cf9a2ae8SDavid Howells * ensure that no dirty buffer is left outside @offset and that no I/O 30*cf9a2ae8SDavid Howells * is underway against any of the blocks which are outside the truncation 31*cf9a2ae8SDavid Howells * point. Because the caller is about to free (and possibly reuse) those 32*cf9a2ae8SDavid Howells * blocks on-disk. 33*cf9a2ae8SDavid Howells */ 34*cf9a2ae8SDavid Howells void do_invalidatepage(struct page *page, unsigned long offset) 35*cf9a2ae8SDavid Howells { 36*cf9a2ae8SDavid Howells void (*invalidatepage)(struct page *, unsigned long); 37*cf9a2ae8SDavid Howells invalidatepage = page->mapping->a_ops->invalidatepage; 38*cf9a2ae8SDavid Howells if (!invalidatepage) 39*cf9a2ae8SDavid Howells invalidatepage = block_invalidatepage; 40*cf9a2ae8SDavid Howells if (invalidatepage) 41*cf9a2ae8SDavid Howells (*invalidatepage)(page, offset); 42*cf9a2ae8SDavid Howells } 43*cf9a2ae8SDavid Howells 441da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 451da177e4SLinus Torvalds { 461da177e4SLinus Torvalds memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 471da177e4SLinus Torvalds if (PagePrivate(page)) 481da177e4SLinus Torvalds do_invalidatepage(page, partial); 491da177e4SLinus Torvalds } 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* 521da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 531da177e4SLinus Torvalds * becomes anonymous. It will be left on the LRU and may even be mapped into 541da177e4SLinus Torvalds * user pagetables if we're racing with filemap_nopage(). 551da177e4SLinus Torvalds * 561da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 571da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 581da177e4SLinus Torvalds * its lock, b) when a concurrent invalidate_inode_pages got there first and 591da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds static void 621da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 631da177e4SLinus Torvalds { 641da177e4SLinus Torvalds if (page->mapping != mapping) 651da177e4SLinus Torvalds return; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds if (PagePrivate(page)) 681da177e4SLinus Torvalds do_invalidatepage(page, 0); 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds clear_page_dirty(page); 711da177e4SLinus Torvalds ClearPageUptodate(page); 721da177e4SLinus Torvalds ClearPageMappedToDisk(page); 731da177e4SLinus Torvalds remove_from_page_cache(page); 741da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 751da177e4SLinus Torvalds } 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds /* 781da177e4SLinus Torvalds * This is for invalidate_inode_pages(). That function can be called at 791da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 800fd0e6b0SNick Piggin * be marked dirty at any time too, so use remove_mapping which safely 810fd0e6b0SNick Piggin * discards clean, unused pages. 821da177e4SLinus Torvalds * 831da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 841da177e4SLinus Torvalds */ 851da177e4SLinus Torvalds static int 861da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 871da177e4SLinus Torvalds { 880fd0e6b0SNick Piggin int ret; 890fd0e6b0SNick Piggin 901da177e4SLinus Torvalds if (page->mapping != mapping) 911da177e4SLinus Torvalds return 0; 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds if (PagePrivate(page) && !try_to_release_page(page, 0)) 941da177e4SLinus Torvalds return 0; 951da177e4SLinus Torvalds 960fd0e6b0SNick Piggin ret = remove_mapping(mapping, page); 971da177e4SLinus Torvalds ClearPageUptodate(page); 980fd0e6b0SNick Piggin 990fd0e6b0SNick Piggin return ret; 1001da177e4SLinus Torvalds } 1011da177e4SLinus Torvalds 1021da177e4SLinus Torvalds /** 103d7339071SHans Reiser * truncate_inode_pages - truncate range of pages specified by start and 104d7339071SHans Reiser * end byte offsets 1051da177e4SLinus Torvalds * @mapping: mapping to truncate 1061da177e4SLinus Torvalds * @lstart: offset from which to truncate 107d7339071SHans Reiser * @lend: offset to which to truncate 1081da177e4SLinus Torvalds * 109d7339071SHans Reiser * Truncate the page cache, removing the pages that are between 110d7339071SHans Reiser * specified offsets (and zeroing out partial page 111d7339071SHans Reiser * (if lstart is not page aligned)). 1121da177e4SLinus Torvalds * 1131da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 1141da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 1151da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 1161da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 1171da177e4SLinus Torvalds * is low. 1181da177e4SLinus Torvalds * 1191da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 1201da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 1211da177e4SLinus Torvalds * 1221da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 1231da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 1241da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 1251da177e4SLinus Torvalds */ 126d7339071SHans Reiser void truncate_inode_pages_range(struct address_space *mapping, 127d7339071SHans Reiser loff_t lstart, loff_t lend) 1281da177e4SLinus Torvalds { 1291da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 130d7339071SHans Reiser pgoff_t end; 1311da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 1321da177e4SLinus Torvalds struct pagevec pvec; 1331da177e4SLinus Torvalds pgoff_t next; 1341da177e4SLinus Torvalds int i; 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds if (mapping->nrpages == 0) 1371da177e4SLinus Torvalds return; 1381da177e4SLinus Torvalds 139d7339071SHans Reiser BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 140d7339071SHans Reiser end = (lend >> PAGE_CACHE_SHIFT); 141d7339071SHans Reiser 1421da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1431da177e4SLinus Torvalds next = start; 144d7339071SHans Reiser while (next <= end && 145d7339071SHans Reiser pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1461da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1471da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1481da177e4SLinus Torvalds pgoff_t page_index = page->index; 1491da177e4SLinus Torvalds 150d7339071SHans Reiser if (page_index > end) { 151d7339071SHans Reiser next = page_index; 152d7339071SHans Reiser break; 153d7339071SHans Reiser } 154d7339071SHans Reiser 1551da177e4SLinus Torvalds if (page_index > next) 1561da177e4SLinus Torvalds next = page_index; 1571da177e4SLinus Torvalds next++; 1581da177e4SLinus Torvalds if (TestSetPageLocked(page)) 1591da177e4SLinus Torvalds continue; 1601da177e4SLinus Torvalds if (PageWriteback(page)) { 1611da177e4SLinus Torvalds unlock_page(page); 1621da177e4SLinus Torvalds continue; 1631da177e4SLinus Torvalds } 1641da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1651da177e4SLinus Torvalds unlock_page(page); 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds pagevec_release(&pvec); 1681da177e4SLinus Torvalds cond_resched(); 1691da177e4SLinus Torvalds } 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds if (partial) { 1721da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 1731da177e4SLinus Torvalds if (page) { 1741da177e4SLinus Torvalds wait_on_page_writeback(page); 1751da177e4SLinus Torvalds truncate_partial_page(page, partial); 1761da177e4SLinus Torvalds unlock_page(page); 1771da177e4SLinus Torvalds page_cache_release(page); 1781da177e4SLinus Torvalds } 1791da177e4SLinus Torvalds } 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds next = start; 1821da177e4SLinus Torvalds for ( ; ; ) { 1831da177e4SLinus Torvalds cond_resched(); 1841da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1851da177e4SLinus Torvalds if (next == start) 1861da177e4SLinus Torvalds break; 1871da177e4SLinus Torvalds next = start; 1881da177e4SLinus Torvalds continue; 1891da177e4SLinus Torvalds } 190d7339071SHans Reiser if (pvec.pages[0]->index > end) { 191d7339071SHans Reiser pagevec_release(&pvec); 192d7339071SHans Reiser break; 193d7339071SHans Reiser } 1941da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1951da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1961da177e4SLinus Torvalds 197d7339071SHans Reiser if (page->index > end) 198d7339071SHans Reiser break; 1991da177e4SLinus Torvalds lock_page(page); 2001da177e4SLinus Torvalds wait_on_page_writeback(page); 2011da177e4SLinus Torvalds if (page->index > next) 2021da177e4SLinus Torvalds next = page->index; 2031da177e4SLinus Torvalds next++; 2041da177e4SLinus Torvalds truncate_complete_page(mapping, page); 2051da177e4SLinus Torvalds unlock_page(page); 2061da177e4SLinus Torvalds } 2071da177e4SLinus Torvalds pagevec_release(&pvec); 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds } 210d7339071SHans Reiser EXPORT_SYMBOL(truncate_inode_pages_range); 2111da177e4SLinus Torvalds 212d7339071SHans Reiser /** 213d7339071SHans Reiser * truncate_inode_pages - truncate *all* the pages from an offset 214d7339071SHans Reiser * @mapping: mapping to truncate 215d7339071SHans Reiser * @lstart: offset from which to truncate 216d7339071SHans Reiser * 2171b1dcc1bSJes Sorensen * Called under (and serialised by) inode->i_mutex. 218d7339071SHans Reiser */ 219d7339071SHans Reiser void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 220d7339071SHans Reiser { 221d7339071SHans Reiser truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 222d7339071SHans Reiser } 2231da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds /** 2261da177e4SLinus Torvalds * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 2271da177e4SLinus Torvalds * @mapping: the address_space which holds the pages to invalidate 2281da177e4SLinus Torvalds * @start: the offset 'from' which to invalidate 2291da177e4SLinus Torvalds * @end: the offset 'to' which to invalidate (inclusive) 2301da177e4SLinus Torvalds * 2311da177e4SLinus Torvalds * This function only removes the unlocked pages, if you want to 2321da177e4SLinus Torvalds * remove all the pages of one inode, you must call truncate_inode_pages. 2331da177e4SLinus Torvalds * 2341da177e4SLinus Torvalds * invalidate_mapping_pages() will not block on IO activity. It will not 2351da177e4SLinus Torvalds * invalidate pages which are dirty, locked, under writeback or mapped into 2361da177e4SLinus Torvalds * pagetables. 2371da177e4SLinus Torvalds */ 2381da177e4SLinus Torvalds unsigned long invalidate_mapping_pages(struct address_space *mapping, 2391da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2401da177e4SLinus Torvalds { 2411da177e4SLinus Torvalds struct pagevec pvec; 2421da177e4SLinus Torvalds pgoff_t next = start; 2431da177e4SLinus Torvalds unsigned long ret = 0; 2441da177e4SLinus Torvalds int i; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2471da177e4SLinus Torvalds while (next <= end && 2481da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2491da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2501da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 251e0f23603SNeilBrown pgoff_t index; 252e0f23603SNeilBrown int lock_failed; 2531da177e4SLinus Torvalds 254e0f23603SNeilBrown lock_failed = TestSetPageLocked(page); 255e0f23603SNeilBrown 256e0f23603SNeilBrown /* 257e0f23603SNeilBrown * We really shouldn't be looking at the ->index of an 258e0f23603SNeilBrown * unlocked page. But we're not allowed to lock these 259e0f23603SNeilBrown * pages. So we rely upon nobody altering the ->index 260e0f23603SNeilBrown * of this (pinned-by-us) page. 261e0f23603SNeilBrown */ 262e0f23603SNeilBrown index = page->index; 263e0f23603SNeilBrown if (index > next) 264e0f23603SNeilBrown next = index; 2651da177e4SLinus Torvalds next++; 266e0f23603SNeilBrown if (lock_failed) 2671da177e4SLinus Torvalds continue; 268e0f23603SNeilBrown 2691da177e4SLinus Torvalds if (PageDirty(page) || PageWriteback(page)) 2701da177e4SLinus Torvalds goto unlock; 2711da177e4SLinus Torvalds if (page_mapped(page)) 2721da177e4SLinus Torvalds goto unlock; 2731da177e4SLinus Torvalds ret += invalidate_complete_page(mapping, page); 2741da177e4SLinus Torvalds unlock: 2751da177e4SLinus Torvalds unlock_page(page); 2761da177e4SLinus Torvalds if (next > end) 2771da177e4SLinus Torvalds break; 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds pagevec_release(&pvec); 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds return ret; 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds unsigned long invalidate_inode_pages(struct address_space *mapping) 2851da177e4SLinus Torvalds { 2861da177e4SLinus Torvalds return invalidate_mapping_pages(mapping, 0, ~0UL); 2871da177e4SLinus Torvalds } 2881da177e4SLinus Torvalds 2891da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_inode_pages); 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds /** 2921da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 29367be2dd1SMartin Waitz * @mapping: the address_space 2941da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 2951da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 2961da177e4SLinus Torvalds * 2971da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 2981da177e4SLinus Torvalds * invalidation. 2991da177e4SLinus Torvalds * 3001da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3011da177e4SLinus Torvalds */ 3021da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 3031da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 3041da177e4SLinus Torvalds { 3051da177e4SLinus Torvalds struct pagevec pvec; 3061da177e4SLinus Torvalds pgoff_t next; 3071da177e4SLinus Torvalds int i; 3081da177e4SLinus Torvalds int ret = 0; 3091da177e4SLinus Torvalds int did_range_unmap = 0; 3101da177e4SLinus Torvalds int wrapped = 0; 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds pagevec_init(&pvec, 0); 3131da177e4SLinus Torvalds next = start; 3141da177e4SLinus Torvalds while (next <= end && !ret && !wrapped && 3151da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 3161da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 3171da177e4SLinus Torvalds for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 3181da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 3191da177e4SLinus Torvalds pgoff_t page_index; 3201da177e4SLinus Torvalds int was_dirty; 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds lock_page(page); 3231da177e4SLinus Torvalds if (page->mapping != mapping) { 3241da177e4SLinus Torvalds unlock_page(page); 3251da177e4SLinus Torvalds continue; 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds page_index = page->index; 3281da177e4SLinus Torvalds next = page_index + 1; 3291da177e4SLinus Torvalds if (next == 0) 3301da177e4SLinus Torvalds wrapped = 1; 3311da177e4SLinus Torvalds if (page_index > end) { 3321da177e4SLinus Torvalds unlock_page(page); 3331da177e4SLinus Torvalds break; 3341da177e4SLinus Torvalds } 3351da177e4SLinus Torvalds wait_on_page_writeback(page); 3361da177e4SLinus Torvalds while (page_mapped(page)) { 3371da177e4SLinus Torvalds if (!did_range_unmap) { 3381da177e4SLinus Torvalds /* 3391da177e4SLinus Torvalds * Zap the rest of the file in one hit. 3401da177e4SLinus Torvalds */ 3411da177e4SLinus Torvalds unmap_mapping_range(mapping, 342479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 343479ef592SOleg Drokin (loff_t)(end - page_index + 1) 3441da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 3451da177e4SLinus Torvalds 0); 3461da177e4SLinus Torvalds did_range_unmap = 1; 3471da177e4SLinus Torvalds } else { 3481da177e4SLinus Torvalds /* 3491da177e4SLinus Torvalds * Just zap this page 3501da177e4SLinus Torvalds */ 3511da177e4SLinus Torvalds unmap_mapping_range(mapping, 352479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 3531da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds was_dirty = test_clear_page_dirty(page); 3571da177e4SLinus Torvalds if (!invalidate_complete_page(mapping, page)) { 3581da177e4SLinus Torvalds if (was_dirty) 3591da177e4SLinus Torvalds set_page_dirty(page); 3601da177e4SLinus Torvalds ret = -EIO; 3611da177e4SLinus Torvalds } 3621da177e4SLinus Torvalds unlock_page(page); 3631da177e4SLinus Torvalds } 3641da177e4SLinus Torvalds pagevec_release(&pvec); 3651da177e4SLinus Torvalds cond_resched(); 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds return ret; 3681da177e4SLinus Torvalds } 3691da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds /** 3721da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 37367be2dd1SMartin Waitz * @mapping: the address_space 3741da177e4SLinus Torvalds * 3751da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 3761da177e4SLinus Torvalds * invalidation. 3771da177e4SLinus Torvalds * 3781da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3791da177e4SLinus Torvalds */ 3801da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 3811da177e4SLinus Torvalds { 3821da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 385