11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 10Sep2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/mm.h> 12*0fd0e6b0SNick Piggin #include <linux/swap.h> 131da177e4SLinus Torvalds #include <linux/module.h> 141da177e4SLinus Torvalds #include <linux/pagemap.h> 151da177e4SLinus Torvalds #include <linux/pagevec.h> 161da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 17aaa4059bSJan Kara do_invalidatepage */ 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 211da177e4SLinus Torvalds { 221da177e4SLinus Torvalds memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 231da177e4SLinus Torvalds if (PagePrivate(page)) 241da177e4SLinus Torvalds do_invalidatepage(page, partial); 251da177e4SLinus Torvalds } 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds /* 281da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 291da177e4SLinus Torvalds * becomes anonymous. It will be left on the LRU and may even be mapped into 301da177e4SLinus Torvalds * user pagetables if we're racing with filemap_nopage(). 311da177e4SLinus Torvalds * 321da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 331da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 341da177e4SLinus Torvalds * its lock, b) when a concurrent invalidate_inode_pages got there first and 351da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 361da177e4SLinus Torvalds */ 371da177e4SLinus Torvalds static void 381da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 391da177e4SLinus Torvalds { 401da177e4SLinus Torvalds if (page->mapping != mapping) 411da177e4SLinus Torvalds return; 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds if (PagePrivate(page)) 441da177e4SLinus Torvalds do_invalidatepage(page, 0); 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds clear_page_dirty(page); 471da177e4SLinus Torvalds ClearPageUptodate(page); 481da177e4SLinus Torvalds ClearPageMappedToDisk(page); 491da177e4SLinus Torvalds remove_from_page_cache(page); 501da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 511da177e4SLinus Torvalds } 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds /* 541da177e4SLinus Torvalds * This is for invalidate_inode_pages(). That function can be called at 551da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 56*0fd0e6b0SNick Piggin * be marked dirty at any time too, so use remove_mapping which safely 57*0fd0e6b0SNick Piggin * discards clean, unused pages. 581da177e4SLinus Torvalds * 591da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds static int 621da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 631da177e4SLinus Torvalds { 64*0fd0e6b0SNick Piggin int ret; 65*0fd0e6b0SNick Piggin 661da177e4SLinus Torvalds if (page->mapping != mapping) 671da177e4SLinus Torvalds return 0; 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds if (PagePrivate(page) && !try_to_release_page(page, 0)) 701da177e4SLinus Torvalds return 0; 711da177e4SLinus Torvalds 72*0fd0e6b0SNick Piggin ret = remove_mapping(mapping, page); 731da177e4SLinus Torvalds ClearPageUptodate(page); 74*0fd0e6b0SNick Piggin 75*0fd0e6b0SNick Piggin return ret; 761da177e4SLinus Torvalds } 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds /** 79d7339071SHans Reiser * truncate_inode_pages - truncate range of pages specified by start and 80d7339071SHans Reiser * end byte offsets 811da177e4SLinus Torvalds * @mapping: mapping to truncate 821da177e4SLinus Torvalds * @lstart: offset from which to truncate 83d7339071SHans Reiser * @lend: offset to which to truncate 841da177e4SLinus Torvalds * 85d7339071SHans Reiser * Truncate the page cache, removing the pages that are between 86d7339071SHans Reiser * specified offsets (and zeroing out partial page 87d7339071SHans Reiser * (if lstart is not page aligned)). 881da177e4SLinus Torvalds * 891da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 901da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 911da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 921da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 931da177e4SLinus Torvalds * is low. 941da177e4SLinus Torvalds * 951da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 961da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 971da177e4SLinus Torvalds * 981da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 991da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 1001da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 1011da177e4SLinus Torvalds */ 102d7339071SHans Reiser void truncate_inode_pages_range(struct address_space *mapping, 103d7339071SHans Reiser loff_t lstart, loff_t lend) 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 106d7339071SHans Reiser pgoff_t end; 1071da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 1081da177e4SLinus Torvalds struct pagevec pvec; 1091da177e4SLinus Torvalds pgoff_t next; 1101da177e4SLinus Torvalds int i; 1111da177e4SLinus Torvalds 1121da177e4SLinus Torvalds if (mapping->nrpages == 0) 1131da177e4SLinus Torvalds return; 1141da177e4SLinus Torvalds 115d7339071SHans Reiser BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 116d7339071SHans Reiser end = (lend >> PAGE_CACHE_SHIFT); 117d7339071SHans Reiser 1181da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1191da177e4SLinus Torvalds next = start; 120d7339071SHans Reiser while (next <= end && 121d7339071SHans Reiser pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1221da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1231da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1241da177e4SLinus Torvalds pgoff_t page_index = page->index; 1251da177e4SLinus Torvalds 126d7339071SHans Reiser if (page_index > end) { 127d7339071SHans Reiser next = page_index; 128d7339071SHans Reiser break; 129d7339071SHans Reiser } 130d7339071SHans Reiser 1311da177e4SLinus Torvalds if (page_index > next) 1321da177e4SLinus Torvalds next = page_index; 1331da177e4SLinus Torvalds next++; 1341da177e4SLinus Torvalds if (TestSetPageLocked(page)) 1351da177e4SLinus Torvalds continue; 1361da177e4SLinus Torvalds if (PageWriteback(page)) { 1371da177e4SLinus Torvalds unlock_page(page); 1381da177e4SLinus Torvalds continue; 1391da177e4SLinus Torvalds } 1401da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1411da177e4SLinus Torvalds unlock_page(page); 1421da177e4SLinus Torvalds } 1431da177e4SLinus Torvalds pagevec_release(&pvec); 1441da177e4SLinus Torvalds cond_resched(); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds if (partial) { 1481da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 1491da177e4SLinus Torvalds if (page) { 1501da177e4SLinus Torvalds wait_on_page_writeback(page); 1511da177e4SLinus Torvalds truncate_partial_page(page, partial); 1521da177e4SLinus Torvalds unlock_page(page); 1531da177e4SLinus Torvalds page_cache_release(page); 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds next = start; 1581da177e4SLinus Torvalds for ( ; ; ) { 1591da177e4SLinus Torvalds cond_resched(); 1601da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1611da177e4SLinus Torvalds if (next == start) 1621da177e4SLinus Torvalds break; 1631da177e4SLinus Torvalds next = start; 1641da177e4SLinus Torvalds continue; 1651da177e4SLinus Torvalds } 166d7339071SHans Reiser if (pvec.pages[0]->index > end) { 167d7339071SHans Reiser pagevec_release(&pvec); 168d7339071SHans Reiser break; 169d7339071SHans Reiser } 1701da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1711da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1721da177e4SLinus Torvalds 173d7339071SHans Reiser if (page->index > end) 174d7339071SHans Reiser break; 1751da177e4SLinus Torvalds lock_page(page); 1761da177e4SLinus Torvalds wait_on_page_writeback(page); 1771da177e4SLinus Torvalds if (page->index > next) 1781da177e4SLinus Torvalds next = page->index; 1791da177e4SLinus Torvalds next++; 1801da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1811da177e4SLinus Torvalds unlock_page(page); 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds pagevec_release(&pvec); 1841da177e4SLinus Torvalds } 1851da177e4SLinus Torvalds } 186d7339071SHans Reiser EXPORT_SYMBOL(truncate_inode_pages_range); 1871da177e4SLinus Torvalds 188d7339071SHans Reiser /** 189d7339071SHans Reiser * truncate_inode_pages - truncate *all* the pages from an offset 190d7339071SHans Reiser * @mapping: mapping to truncate 191d7339071SHans Reiser * @lstart: offset from which to truncate 192d7339071SHans Reiser * 1931b1dcc1bSJes Sorensen * Called under (and serialised by) inode->i_mutex. 194d7339071SHans Reiser */ 195d7339071SHans Reiser void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 196d7339071SHans Reiser { 197d7339071SHans Reiser truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 198d7339071SHans Reiser } 1991da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 2001da177e4SLinus Torvalds 2011da177e4SLinus Torvalds /** 2021da177e4SLinus Torvalds * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 2031da177e4SLinus Torvalds * @mapping: the address_space which holds the pages to invalidate 2041da177e4SLinus Torvalds * @start: the offset 'from' which to invalidate 2051da177e4SLinus Torvalds * @end: the offset 'to' which to invalidate (inclusive) 2061da177e4SLinus Torvalds * 2071da177e4SLinus Torvalds * This function only removes the unlocked pages, if you want to 2081da177e4SLinus Torvalds * remove all the pages of one inode, you must call truncate_inode_pages. 2091da177e4SLinus Torvalds * 2101da177e4SLinus Torvalds * invalidate_mapping_pages() will not block on IO activity. It will not 2111da177e4SLinus Torvalds * invalidate pages which are dirty, locked, under writeback or mapped into 2121da177e4SLinus Torvalds * pagetables. 2131da177e4SLinus Torvalds */ 2141da177e4SLinus Torvalds unsigned long invalidate_mapping_pages(struct address_space *mapping, 2151da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2161da177e4SLinus Torvalds { 2171da177e4SLinus Torvalds struct pagevec pvec; 2181da177e4SLinus Torvalds pgoff_t next = start; 2191da177e4SLinus Torvalds unsigned long ret = 0; 2201da177e4SLinus Torvalds int i; 2211da177e4SLinus Torvalds 2221da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2231da177e4SLinus Torvalds while (next <= end && 2241da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2251da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2261da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 227e0f23603SNeilBrown pgoff_t index; 228e0f23603SNeilBrown int lock_failed; 2291da177e4SLinus Torvalds 230e0f23603SNeilBrown lock_failed = TestSetPageLocked(page); 231e0f23603SNeilBrown 232e0f23603SNeilBrown /* 233e0f23603SNeilBrown * We really shouldn't be looking at the ->index of an 234e0f23603SNeilBrown * unlocked page. But we're not allowed to lock these 235e0f23603SNeilBrown * pages. So we rely upon nobody altering the ->index 236e0f23603SNeilBrown * of this (pinned-by-us) page. 237e0f23603SNeilBrown */ 238e0f23603SNeilBrown index = page->index; 239e0f23603SNeilBrown if (index > next) 240e0f23603SNeilBrown next = index; 2411da177e4SLinus Torvalds next++; 242e0f23603SNeilBrown if (lock_failed) 2431da177e4SLinus Torvalds continue; 244e0f23603SNeilBrown 2451da177e4SLinus Torvalds if (PageDirty(page) || PageWriteback(page)) 2461da177e4SLinus Torvalds goto unlock; 2471da177e4SLinus Torvalds if (page_mapped(page)) 2481da177e4SLinus Torvalds goto unlock; 2491da177e4SLinus Torvalds ret += invalidate_complete_page(mapping, page); 2501da177e4SLinus Torvalds unlock: 2511da177e4SLinus Torvalds unlock_page(page); 2521da177e4SLinus Torvalds if (next > end) 2531da177e4SLinus Torvalds break; 2541da177e4SLinus Torvalds } 2551da177e4SLinus Torvalds pagevec_release(&pvec); 2561da177e4SLinus Torvalds } 2571da177e4SLinus Torvalds return ret; 2581da177e4SLinus Torvalds } 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds unsigned long invalidate_inode_pages(struct address_space *mapping) 2611da177e4SLinus Torvalds { 2621da177e4SLinus Torvalds return invalidate_mapping_pages(mapping, 0, ~0UL); 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_inode_pages); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds /** 2681da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 26967be2dd1SMartin Waitz * @mapping: the address_space 2701da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 2711da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 2721da177e4SLinus Torvalds * 2731da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 2741da177e4SLinus Torvalds * invalidation. 2751da177e4SLinus Torvalds * 2761da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 2771da177e4SLinus Torvalds */ 2781da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 2791da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2801da177e4SLinus Torvalds { 2811da177e4SLinus Torvalds struct pagevec pvec; 2821da177e4SLinus Torvalds pgoff_t next; 2831da177e4SLinus Torvalds int i; 2841da177e4SLinus Torvalds int ret = 0; 2851da177e4SLinus Torvalds int did_range_unmap = 0; 2861da177e4SLinus Torvalds int wrapped = 0; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2891da177e4SLinus Torvalds next = start; 2901da177e4SLinus Torvalds while (next <= end && !ret && !wrapped && 2911da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 2921da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 2931da177e4SLinus Torvalds for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 2941da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2951da177e4SLinus Torvalds pgoff_t page_index; 2961da177e4SLinus Torvalds int was_dirty; 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds lock_page(page); 2991da177e4SLinus Torvalds if (page->mapping != mapping) { 3001da177e4SLinus Torvalds unlock_page(page); 3011da177e4SLinus Torvalds continue; 3021da177e4SLinus Torvalds } 3031da177e4SLinus Torvalds page_index = page->index; 3041da177e4SLinus Torvalds next = page_index + 1; 3051da177e4SLinus Torvalds if (next == 0) 3061da177e4SLinus Torvalds wrapped = 1; 3071da177e4SLinus Torvalds if (page_index > end) { 3081da177e4SLinus Torvalds unlock_page(page); 3091da177e4SLinus Torvalds break; 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds wait_on_page_writeback(page); 3121da177e4SLinus Torvalds while (page_mapped(page)) { 3131da177e4SLinus Torvalds if (!did_range_unmap) { 3141da177e4SLinus Torvalds /* 3151da177e4SLinus Torvalds * Zap the rest of the file in one hit. 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds unmap_mapping_range(mapping, 318479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 319479ef592SOleg Drokin (loff_t)(end - page_index + 1) 3201da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 3211da177e4SLinus Torvalds 0); 3221da177e4SLinus Torvalds did_range_unmap = 1; 3231da177e4SLinus Torvalds } else { 3241da177e4SLinus Torvalds /* 3251da177e4SLinus Torvalds * Just zap this page 3261da177e4SLinus Torvalds */ 3271da177e4SLinus Torvalds unmap_mapping_range(mapping, 328479ef592SOleg Drokin (loff_t)page_index<<PAGE_CACHE_SHIFT, 3291da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds } 3321da177e4SLinus Torvalds was_dirty = test_clear_page_dirty(page); 3331da177e4SLinus Torvalds if (!invalidate_complete_page(mapping, page)) { 3341da177e4SLinus Torvalds if (was_dirty) 3351da177e4SLinus Torvalds set_page_dirty(page); 3361da177e4SLinus Torvalds ret = -EIO; 3371da177e4SLinus Torvalds } 3381da177e4SLinus Torvalds unlock_page(page); 3391da177e4SLinus Torvalds } 3401da177e4SLinus Torvalds pagevec_release(&pvec); 3411da177e4SLinus Torvalds cond_resched(); 3421da177e4SLinus Torvalds } 3431da177e4SLinus Torvalds return ret; 3441da177e4SLinus Torvalds } 3451da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 3461da177e4SLinus Torvalds 3471da177e4SLinus Torvalds /** 3481da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 34967be2dd1SMartin Waitz * @mapping: the address_space 3501da177e4SLinus Torvalds * 3511da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 3521da177e4SLinus Torvalds * invalidation. 3531da177e4SLinus Torvalds * 3541da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3551da177e4SLinus Torvalds */ 3561da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 3571da177e4SLinus Torvalds { 3581da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 3591da177e4SLinus Torvalds } 3601da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 361