11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 10Sep2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/module.h> 131da177e4SLinus Torvalds #include <linux/pagemap.h> 141da177e4SLinus Torvalds #include <linux/pagevec.h> 151da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 16*aaa4059bSJan Kara do_invalidatepage */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 201da177e4SLinus Torvalds { 211da177e4SLinus Torvalds memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 221da177e4SLinus Torvalds if (PagePrivate(page)) 231da177e4SLinus Torvalds do_invalidatepage(page, partial); 241da177e4SLinus Torvalds } 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 281da177e4SLinus Torvalds * becomes anonymous. It will be left on the LRU and may even be mapped into 291da177e4SLinus Torvalds * user pagetables if we're racing with filemap_nopage(). 301da177e4SLinus Torvalds * 311da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 321da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 331da177e4SLinus Torvalds * its lock, b) when a concurrent invalidate_inode_pages got there first and 341da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds static void 371da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 381da177e4SLinus Torvalds { 391da177e4SLinus Torvalds if (page->mapping != mapping) 401da177e4SLinus Torvalds return; 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds if (PagePrivate(page)) 431da177e4SLinus Torvalds do_invalidatepage(page, 0); 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds clear_page_dirty(page); 461da177e4SLinus Torvalds ClearPageUptodate(page); 471da177e4SLinus Torvalds ClearPageMappedToDisk(page); 481da177e4SLinus Torvalds remove_from_page_cache(page); 491da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 501da177e4SLinus Torvalds } 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 531da177e4SLinus Torvalds * This is for invalidate_inode_pages(). That function can be called at 541da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 551da177e4SLinus Torvalds * be marked dirty at any time too. So we re-check the dirtiness inside 561da177e4SLinus Torvalds * ->tree_lock. That provides exclusion against the __set_page_dirty 571da177e4SLinus Torvalds * functions. 581da177e4SLinus Torvalds * 591da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds static int 621da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 631da177e4SLinus Torvalds { 641da177e4SLinus Torvalds if (page->mapping != mapping) 651da177e4SLinus Torvalds return 0; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds if (PagePrivate(page) && !try_to_release_page(page, 0)) 681da177e4SLinus Torvalds return 0; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds write_lock_irq(&mapping->tree_lock); 711da177e4SLinus Torvalds if (PageDirty(page)) { 721da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 731da177e4SLinus Torvalds return 0; 741da177e4SLinus Torvalds } 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds BUG_ON(PagePrivate(page)); 771da177e4SLinus Torvalds __remove_from_page_cache(page); 781da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 791da177e4SLinus Torvalds ClearPageUptodate(page); 801da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 811da177e4SLinus Torvalds return 1; 821da177e4SLinus Torvalds } 831da177e4SLinus Torvalds 841da177e4SLinus Torvalds /** 851da177e4SLinus Torvalds * truncate_inode_pages - truncate *all* the pages from an offset 861da177e4SLinus Torvalds * @mapping: mapping to truncate 871da177e4SLinus Torvalds * @lstart: offset from which to truncate 881da177e4SLinus Torvalds * 891da177e4SLinus Torvalds * Truncate the page cache at a set offset, removing the pages that are beyond 901da177e4SLinus Torvalds * that offset (and zeroing out partial pages). 911da177e4SLinus Torvalds * 921da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 931da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 941da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 951da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 961da177e4SLinus Torvalds * is low. 971da177e4SLinus Torvalds * 981da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 991da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 1001da177e4SLinus Torvalds * 1011da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 1021da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 1031da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 1041da177e4SLinus Torvalds * 1051da177e4SLinus Torvalds * Called under (and serialised by) inode->i_sem. 1061da177e4SLinus Torvalds */ 1071da177e4SLinus Torvalds void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 1081da177e4SLinus Torvalds { 1091da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1101da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 1111da177e4SLinus Torvalds struct pagevec pvec; 1121da177e4SLinus Torvalds pgoff_t next; 1131da177e4SLinus Torvalds int i; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds if (mapping->nrpages == 0) 1161da177e4SLinus Torvalds return; 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1191da177e4SLinus Torvalds next = start; 1201da177e4SLinus Torvalds while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1211da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1221da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1231da177e4SLinus Torvalds pgoff_t page_index = page->index; 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds if (page_index > next) 1261da177e4SLinus Torvalds next = page_index; 1271da177e4SLinus Torvalds next++; 1281da177e4SLinus Torvalds if (TestSetPageLocked(page)) 1291da177e4SLinus Torvalds continue; 1301da177e4SLinus Torvalds if (PageWriteback(page)) { 1311da177e4SLinus Torvalds unlock_page(page); 1321da177e4SLinus Torvalds continue; 1331da177e4SLinus Torvalds } 1341da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1351da177e4SLinus Torvalds unlock_page(page); 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds pagevec_release(&pvec); 1381da177e4SLinus Torvalds cond_resched(); 1391da177e4SLinus Torvalds } 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds if (partial) { 1421da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 1431da177e4SLinus Torvalds if (page) { 1441da177e4SLinus Torvalds wait_on_page_writeback(page); 1451da177e4SLinus Torvalds truncate_partial_page(page, partial); 1461da177e4SLinus Torvalds unlock_page(page); 1471da177e4SLinus Torvalds page_cache_release(page); 1481da177e4SLinus Torvalds } 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds next = start; 1521da177e4SLinus Torvalds for ( ; ; ) { 1531da177e4SLinus Torvalds cond_resched(); 1541da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1551da177e4SLinus Torvalds if (next == start) 1561da177e4SLinus Torvalds break; 1571da177e4SLinus Torvalds next = start; 1581da177e4SLinus Torvalds continue; 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1611da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds lock_page(page); 1641da177e4SLinus Torvalds wait_on_page_writeback(page); 1651da177e4SLinus Torvalds if (page->index > next) 1661da177e4SLinus Torvalds next = page->index; 1671da177e4SLinus Torvalds next++; 1681da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1691da177e4SLinus Torvalds unlock_page(page); 1701da177e4SLinus Torvalds } 1711da177e4SLinus Torvalds pagevec_release(&pvec); 1721da177e4SLinus Torvalds } 1731da177e4SLinus Torvalds } 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds /** 1781da177e4SLinus Torvalds * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 1791da177e4SLinus Torvalds * @mapping: the address_space which holds the pages to invalidate 1801da177e4SLinus Torvalds * @start: the offset 'from' which to invalidate 1811da177e4SLinus Torvalds * @end: the offset 'to' which to invalidate (inclusive) 1821da177e4SLinus Torvalds * 1831da177e4SLinus Torvalds * This function only removes the unlocked pages, if you want to 1841da177e4SLinus Torvalds * remove all the pages of one inode, you must call truncate_inode_pages. 1851da177e4SLinus Torvalds * 1861da177e4SLinus Torvalds * invalidate_mapping_pages() will not block on IO activity. It will not 1871da177e4SLinus Torvalds * invalidate pages which are dirty, locked, under writeback or mapped into 1881da177e4SLinus Torvalds * pagetables. 1891da177e4SLinus Torvalds */ 1901da177e4SLinus Torvalds unsigned long invalidate_mapping_pages(struct address_space *mapping, 1911da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 1921da177e4SLinus Torvalds { 1931da177e4SLinus Torvalds struct pagevec pvec; 1941da177e4SLinus Torvalds pgoff_t next = start; 1951da177e4SLinus Torvalds unsigned long ret = 0; 1961da177e4SLinus Torvalds int i; 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1991da177e4SLinus Torvalds while (next <= end && 2001da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2011da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2021da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2031da177e4SLinus Torvalds 2041da177e4SLinus Torvalds if (TestSetPageLocked(page)) { 2051da177e4SLinus Torvalds next++; 2061da177e4SLinus Torvalds continue; 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds if (page->index > next) 2091da177e4SLinus Torvalds next = page->index; 2101da177e4SLinus Torvalds next++; 2111da177e4SLinus Torvalds if (PageDirty(page) || PageWriteback(page)) 2121da177e4SLinus Torvalds goto unlock; 2131da177e4SLinus Torvalds if (page_mapped(page)) 2141da177e4SLinus Torvalds goto unlock; 2151da177e4SLinus Torvalds ret += invalidate_complete_page(mapping, page); 2161da177e4SLinus Torvalds unlock: 2171da177e4SLinus Torvalds unlock_page(page); 2181da177e4SLinus Torvalds if (next > end) 2191da177e4SLinus Torvalds break; 2201da177e4SLinus Torvalds } 2211da177e4SLinus Torvalds pagevec_release(&pvec); 2221da177e4SLinus Torvalds cond_resched(); 2231da177e4SLinus Torvalds } 2241da177e4SLinus Torvalds return ret; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds unsigned long invalidate_inode_pages(struct address_space *mapping) 2281da177e4SLinus Torvalds { 2291da177e4SLinus Torvalds return invalidate_mapping_pages(mapping, 0, ~0UL); 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_inode_pages); 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds /** 2351da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 23667be2dd1SMartin Waitz * @mapping: the address_space 2371da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 2381da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 2391da177e4SLinus Torvalds * 2401da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 2411da177e4SLinus Torvalds * invalidation. 2421da177e4SLinus Torvalds * 2431da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 2441da177e4SLinus Torvalds */ 2451da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 2461da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2471da177e4SLinus Torvalds { 2481da177e4SLinus Torvalds struct pagevec pvec; 2491da177e4SLinus Torvalds pgoff_t next; 2501da177e4SLinus Torvalds int i; 2511da177e4SLinus Torvalds int ret = 0; 2521da177e4SLinus Torvalds int did_range_unmap = 0; 2531da177e4SLinus Torvalds int wrapped = 0; 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2561da177e4SLinus Torvalds next = start; 2571da177e4SLinus Torvalds while (next <= end && !ret && !wrapped && 2581da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 2591da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 2601da177e4SLinus Torvalds for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 2611da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2621da177e4SLinus Torvalds pgoff_t page_index; 2631da177e4SLinus Torvalds int was_dirty; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds lock_page(page); 2661da177e4SLinus Torvalds if (page->mapping != mapping) { 2671da177e4SLinus Torvalds unlock_page(page); 2681da177e4SLinus Torvalds continue; 2691da177e4SLinus Torvalds } 2701da177e4SLinus Torvalds page_index = page->index; 2711da177e4SLinus Torvalds next = page_index + 1; 2721da177e4SLinus Torvalds if (next == 0) 2731da177e4SLinus Torvalds wrapped = 1; 2741da177e4SLinus Torvalds if (page_index > end) { 2751da177e4SLinus Torvalds unlock_page(page); 2761da177e4SLinus Torvalds break; 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds wait_on_page_writeback(page); 2791da177e4SLinus Torvalds while (page_mapped(page)) { 2801da177e4SLinus Torvalds if (!did_range_unmap) { 2811da177e4SLinus Torvalds /* 2821da177e4SLinus Torvalds * Zap the rest of the file in one hit. 2831da177e4SLinus Torvalds */ 2841da177e4SLinus Torvalds unmap_mapping_range(mapping, 2851da177e4SLinus Torvalds page_index << PAGE_CACHE_SHIFT, 2861da177e4SLinus Torvalds (end - page_index + 1) 2871da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 2881da177e4SLinus Torvalds 0); 2891da177e4SLinus Torvalds did_range_unmap = 1; 2901da177e4SLinus Torvalds } else { 2911da177e4SLinus Torvalds /* 2921da177e4SLinus Torvalds * Just zap this page 2931da177e4SLinus Torvalds */ 2941da177e4SLinus Torvalds unmap_mapping_range(mapping, 2951da177e4SLinus Torvalds page_index << PAGE_CACHE_SHIFT, 2961da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 2971da177e4SLinus Torvalds } 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds was_dirty = test_clear_page_dirty(page); 3001da177e4SLinus Torvalds if (!invalidate_complete_page(mapping, page)) { 3011da177e4SLinus Torvalds if (was_dirty) 3021da177e4SLinus Torvalds set_page_dirty(page); 3031da177e4SLinus Torvalds ret = -EIO; 3041da177e4SLinus Torvalds } 3051da177e4SLinus Torvalds unlock_page(page); 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds pagevec_release(&pvec); 3081da177e4SLinus Torvalds cond_resched(); 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds return ret; 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds /** 3151da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 31667be2dd1SMartin Waitz * @mapping: the address_space 3171da177e4SLinus Torvalds * 3181da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 3191da177e4SLinus Torvalds * invalidation. 3201da177e4SLinus Torvalds * 3211da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3221da177e4SLinus Torvalds */ 3231da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 3241da177e4SLinus Torvalds { 3251da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 328