11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 10Sep2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/module.h> 131da177e4SLinus Torvalds #include <linux/pagemap.h> 141da177e4SLinus Torvalds #include <linux/pagevec.h> 151da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 16aaa4059bSJan Kara do_invalidatepage */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 201da177e4SLinus Torvalds { 211da177e4SLinus Torvalds memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 221da177e4SLinus Torvalds if (PagePrivate(page)) 231da177e4SLinus Torvalds do_invalidatepage(page, partial); 241da177e4SLinus Torvalds } 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 281da177e4SLinus Torvalds * becomes anonymous. It will be left on the LRU and may even be mapped into 291da177e4SLinus Torvalds * user pagetables if we're racing with filemap_nopage(). 301da177e4SLinus Torvalds * 311da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 321da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 331da177e4SLinus Torvalds * its lock, b) when a concurrent invalidate_inode_pages got there first and 341da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds static void 371da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 381da177e4SLinus Torvalds { 391da177e4SLinus Torvalds if (page->mapping != mapping) 401da177e4SLinus Torvalds return; 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds if (PagePrivate(page)) 431da177e4SLinus Torvalds do_invalidatepage(page, 0); 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds clear_page_dirty(page); 461da177e4SLinus Torvalds ClearPageUptodate(page); 471da177e4SLinus Torvalds ClearPageMappedToDisk(page); 481da177e4SLinus Torvalds remove_from_page_cache(page); 491da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 501da177e4SLinus Torvalds } 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 531da177e4SLinus Torvalds * This is for invalidate_inode_pages(). That function can be called at 541da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 551da177e4SLinus Torvalds * be marked dirty at any time too. So we re-check the dirtiness inside 561da177e4SLinus Torvalds * ->tree_lock. That provides exclusion against the __set_page_dirty 571da177e4SLinus Torvalds * functions. 581da177e4SLinus Torvalds * 591da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds static int 621da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 631da177e4SLinus Torvalds { 641da177e4SLinus Torvalds if (page->mapping != mapping) 651da177e4SLinus Torvalds return 0; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds if (PagePrivate(page) && !try_to_release_page(page, 0)) 681da177e4SLinus Torvalds return 0; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds write_lock_irq(&mapping->tree_lock); 711da177e4SLinus Torvalds if (PageDirty(page)) { 721da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 731da177e4SLinus Torvalds return 0; 741da177e4SLinus Torvalds } 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds BUG_ON(PagePrivate(page)); 771da177e4SLinus Torvalds __remove_from_page_cache(page); 781da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 791da177e4SLinus Torvalds ClearPageUptodate(page); 801da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 811da177e4SLinus Torvalds return 1; 821da177e4SLinus Torvalds } 831da177e4SLinus Torvalds 841da177e4SLinus Torvalds /** 85d7339071SHans Reiser * truncate_inode_pages - truncate range of pages specified by start and 86d7339071SHans Reiser * end byte offsets 871da177e4SLinus Torvalds * @mapping: mapping to truncate 881da177e4SLinus Torvalds * @lstart: offset from which to truncate 89d7339071SHans Reiser * @lend: offset to which to truncate 901da177e4SLinus Torvalds * 91d7339071SHans Reiser * Truncate the page cache, removing the pages that are between 92d7339071SHans Reiser * specified offsets (and zeroing out partial page 93d7339071SHans Reiser * (if lstart is not page aligned)). 941da177e4SLinus Torvalds * 951da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 961da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 971da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 981da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 991da177e4SLinus Torvalds * is low. 1001da177e4SLinus Torvalds * 1011da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 1021da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 1031da177e4SLinus Torvalds * 1041da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 1051da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 1061da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 1071da177e4SLinus Torvalds */ 108d7339071SHans Reiser void truncate_inode_pages_range(struct address_space *mapping, 109d7339071SHans Reiser loff_t lstart, loff_t lend) 1101da177e4SLinus Torvalds { 1111da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 112d7339071SHans Reiser pgoff_t end; 1131da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 1141da177e4SLinus Torvalds struct pagevec pvec; 1151da177e4SLinus Torvalds pgoff_t next; 1161da177e4SLinus Torvalds int i; 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds if (mapping->nrpages == 0) 1191da177e4SLinus Torvalds return; 1201da177e4SLinus Torvalds 121d7339071SHans Reiser BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 122d7339071SHans Reiser end = (lend >> PAGE_CACHE_SHIFT); 123d7339071SHans Reiser 1241da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1251da177e4SLinus Torvalds next = start; 126d7339071SHans Reiser while (next <= end && 127d7339071SHans Reiser pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1281da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1291da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1301da177e4SLinus Torvalds pgoff_t page_index = page->index; 1311da177e4SLinus Torvalds 132d7339071SHans Reiser if (page_index > end) { 133d7339071SHans Reiser next = page_index; 134d7339071SHans Reiser break; 135d7339071SHans Reiser } 136d7339071SHans Reiser 1371da177e4SLinus Torvalds if (page_index > next) 1381da177e4SLinus Torvalds next = page_index; 1391da177e4SLinus Torvalds next++; 1401da177e4SLinus Torvalds if (TestSetPageLocked(page)) 1411da177e4SLinus Torvalds continue; 1421da177e4SLinus Torvalds if (PageWriteback(page)) { 1431da177e4SLinus Torvalds unlock_page(page); 1441da177e4SLinus Torvalds continue; 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1471da177e4SLinus Torvalds unlock_page(page); 1481da177e4SLinus Torvalds } 1491da177e4SLinus Torvalds pagevec_release(&pvec); 1501da177e4SLinus Torvalds cond_resched(); 1511da177e4SLinus Torvalds } 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds if (partial) { 1541da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 1551da177e4SLinus Torvalds if (page) { 1561da177e4SLinus Torvalds wait_on_page_writeback(page); 1571da177e4SLinus Torvalds truncate_partial_page(page, partial); 1581da177e4SLinus Torvalds unlock_page(page); 1591da177e4SLinus Torvalds page_cache_release(page); 1601da177e4SLinus Torvalds } 1611da177e4SLinus Torvalds } 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds next = start; 1641da177e4SLinus Torvalds for ( ; ; ) { 1651da177e4SLinus Torvalds cond_resched(); 1661da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1671da177e4SLinus Torvalds if (next == start) 1681da177e4SLinus Torvalds break; 1691da177e4SLinus Torvalds next = start; 1701da177e4SLinus Torvalds continue; 1711da177e4SLinus Torvalds } 172d7339071SHans Reiser if (pvec.pages[0]->index > end) { 173d7339071SHans Reiser pagevec_release(&pvec); 174d7339071SHans Reiser break; 175d7339071SHans Reiser } 1761da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1771da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1781da177e4SLinus Torvalds 179d7339071SHans Reiser if (page->index > end) 180d7339071SHans Reiser break; 1811da177e4SLinus Torvalds lock_page(page); 1821da177e4SLinus Torvalds wait_on_page_writeback(page); 1831da177e4SLinus Torvalds if (page->index > next) 1841da177e4SLinus Torvalds next = page->index; 1851da177e4SLinus Torvalds next++; 1861da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1871da177e4SLinus Torvalds unlock_page(page); 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds pagevec_release(&pvec); 1901da177e4SLinus Torvalds } 1911da177e4SLinus Torvalds } 192d7339071SHans Reiser EXPORT_SYMBOL(truncate_inode_pages_range); 1931da177e4SLinus Torvalds 194d7339071SHans Reiser /** 195d7339071SHans Reiser * truncate_inode_pages - truncate *all* the pages from an offset 196d7339071SHans Reiser * @mapping: mapping to truncate 197d7339071SHans Reiser * @lstart: offset from which to truncate 198d7339071SHans Reiser * 199*1b1dcc1bSJes Sorensen * Called under (and serialised by) inode->i_mutex. 200d7339071SHans Reiser */ 201d7339071SHans Reiser void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 202d7339071SHans Reiser { 203d7339071SHans Reiser truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 204d7339071SHans Reiser } 2051da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds /** 2081da177e4SLinus Torvalds * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 2091da177e4SLinus Torvalds * @mapping: the address_space which holds the pages to invalidate 2101da177e4SLinus Torvalds * @start: the offset 'from' which to invalidate 2111da177e4SLinus Torvalds * @end: the offset 'to' which to invalidate (inclusive) 2121da177e4SLinus Torvalds * 2131da177e4SLinus Torvalds * This function only removes the unlocked pages, if you want to 2141da177e4SLinus Torvalds * remove all the pages of one inode, you must call truncate_inode_pages. 2151da177e4SLinus Torvalds * 2161da177e4SLinus Torvalds * invalidate_mapping_pages() will not block on IO activity. It will not 2171da177e4SLinus Torvalds * invalidate pages which are dirty, locked, under writeback or mapped into 2181da177e4SLinus Torvalds * pagetables. 2191da177e4SLinus Torvalds */ 2201da177e4SLinus Torvalds unsigned long invalidate_mapping_pages(struct address_space *mapping, 2211da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2221da177e4SLinus Torvalds { 2231da177e4SLinus Torvalds struct pagevec pvec; 2241da177e4SLinus Torvalds pgoff_t next = start; 2251da177e4SLinus Torvalds unsigned long ret = 0; 2261da177e4SLinus Torvalds int i; 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2291da177e4SLinus Torvalds while (next <= end && 2301da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2311da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2321da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds if (TestSetPageLocked(page)) { 2351da177e4SLinus Torvalds next++; 2361da177e4SLinus Torvalds continue; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds if (page->index > next) 2391da177e4SLinus Torvalds next = page->index; 2401da177e4SLinus Torvalds next++; 2411da177e4SLinus Torvalds if (PageDirty(page) || PageWriteback(page)) 2421da177e4SLinus Torvalds goto unlock; 2431da177e4SLinus Torvalds if (page_mapped(page)) 2441da177e4SLinus Torvalds goto unlock; 2451da177e4SLinus Torvalds ret += invalidate_complete_page(mapping, page); 2461da177e4SLinus Torvalds unlock: 2471da177e4SLinus Torvalds unlock_page(page); 2481da177e4SLinus Torvalds if (next > end) 2491da177e4SLinus Torvalds break; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds pagevec_release(&pvec); 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds return ret; 2541da177e4SLinus Torvalds } 2551da177e4SLinus Torvalds 2561da177e4SLinus Torvalds unsigned long invalidate_inode_pages(struct address_space *mapping) 2571da177e4SLinus Torvalds { 2581da177e4SLinus Torvalds return invalidate_mapping_pages(mapping, 0, ~0UL); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_inode_pages); 2621da177e4SLinus Torvalds 2631da177e4SLinus Torvalds /** 2641da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 26567be2dd1SMartin Waitz * @mapping: the address_space 2661da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 2671da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 2681da177e4SLinus Torvalds * 2691da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 2701da177e4SLinus Torvalds * invalidation. 2711da177e4SLinus Torvalds * 2721da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 2731da177e4SLinus Torvalds */ 2741da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 2751da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2761da177e4SLinus Torvalds { 2771da177e4SLinus Torvalds struct pagevec pvec; 2781da177e4SLinus Torvalds pgoff_t next; 2791da177e4SLinus Torvalds int i; 2801da177e4SLinus Torvalds int ret = 0; 2811da177e4SLinus Torvalds int did_range_unmap = 0; 2821da177e4SLinus Torvalds int wrapped = 0; 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2851da177e4SLinus Torvalds next = start; 2861da177e4SLinus Torvalds while (next <= end && !ret && !wrapped && 2871da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 2881da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 2891da177e4SLinus Torvalds for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 2901da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2911da177e4SLinus Torvalds pgoff_t page_index; 2921da177e4SLinus Torvalds int was_dirty; 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds lock_page(page); 2951da177e4SLinus Torvalds if (page->mapping != mapping) { 2961da177e4SLinus Torvalds unlock_page(page); 2971da177e4SLinus Torvalds continue; 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds page_index = page->index; 3001da177e4SLinus Torvalds next = page_index + 1; 3011da177e4SLinus Torvalds if (next == 0) 3021da177e4SLinus Torvalds wrapped = 1; 3031da177e4SLinus Torvalds if (page_index > end) { 3041da177e4SLinus Torvalds unlock_page(page); 3051da177e4SLinus Torvalds break; 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds wait_on_page_writeback(page); 3081da177e4SLinus Torvalds while (page_mapped(page)) { 3091da177e4SLinus Torvalds if (!did_range_unmap) { 3101da177e4SLinus Torvalds /* 3111da177e4SLinus Torvalds * Zap the rest of the file in one hit. 3121da177e4SLinus Torvalds */ 3131da177e4SLinus Torvalds unmap_mapping_range(mapping, 314479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 315479ef592SOleg Drokin (loff_t)(end - page_index + 1) 3161da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 3171da177e4SLinus Torvalds 0); 3181da177e4SLinus Torvalds did_range_unmap = 1; 3191da177e4SLinus Torvalds } else { 3201da177e4SLinus Torvalds /* 3211da177e4SLinus Torvalds * Just zap this page 3221da177e4SLinus Torvalds */ 3231da177e4SLinus Torvalds unmap_mapping_range(mapping, 324479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 3251da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds was_dirty = test_clear_page_dirty(page); 3291da177e4SLinus Torvalds if (!invalidate_complete_page(mapping, page)) { 3301da177e4SLinus Torvalds if (was_dirty) 3311da177e4SLinus Torvalds set_page_dirty(page); 3321da177e4SLinus Torvalds ret = -EIO; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds unlock_page(page); 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds pagevec_release(&pvec); 3371da177e4SLinus Torvalds cond_resched(); 3381da177e4SLinus Torvalds } 3391da177e4SLinus Torvalds return ret; 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds /** 3441da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 34567be2dd1SMartin Waitz * @mapping: the address_space 3461da177e4SLinus Torvalds * 3471da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 3481da177e4SLinus Torvalds * invalidation. 3491da177e4SLinus Torvalds * 3501da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3511da177e4SLinus Torvalds */ 3521da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 3531da177e4SLinus Torvalds { 3541da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 357