11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds /* 8183ff22bSSimon Arlott * This file contains the default values for the operation of the 91da177e4SLinus Torvalds * Linux VM subsystem. Fine-tuning documentation can be found in 101da177e4SLinus Torvalds * Documentation/sysctl/vm.txt. 111da177e4SLinus Torvalds * Started 18.12.91 121da177e4SLinus Torvalds * Swap aging added 23.2.95, Stephen Tweedie. 131da177e4SLinus Torvalds * Buffermem limits added 12.3.98, Rik van Riel. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/sched.h> 181da177e4SLinus Torvalds #include <linux/kernel_stat.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/mman.h> 211da177e4SLinus Torvalds #include <linux/pagemap.h> 221da177e4SLinus Torvalds #include <linux/pagevec.h> 231da177e4SLinus Torvalds #include <linux/init.h> 241da177e4SLinus Torvalds #include <linux/module.h> 251da177e4SLinus Torvalds #include <linux/mm_inline.h> 261da177e4SLinus Torvalds #include <linux/buffer_head.h> /* for try_to_release_page() */ 271da177e4SLinus Torvalds #include <linux/percpu_counter.h> 281da177e4SLinus Torvalds #include <linux/percpu.h> 291da177e4SLinus Torvalds #include <linux/cpu.h> 301da177e4SLinus Torvalds #include <linux/notifier.h> 31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h> 3266e1707bSBalbir Singh #include <linux/memcontrol.h> 335a0e3ad6STejun Heo #include <linux/gfp.h> 341da177e4SLinus Torvalds 3564d6519dSLee Schermerhorn #include "internal.h" 3664d6519dSLee Schermerhorn 371da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */ 381da177e4SLinus Torvalds int page_cluster; 391da177e4SLinus Torvalds 40f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 41f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 42*31560180SMinchan Kim static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); 43902aaed0SHisashi Hifumi 44b221385bSAdrian Bunk /* 45b221385bSAdrian Bunk * This path almost never happens for VM activity - pages are normally 46b221385bSAdrian Bunk * freed via pagevecs. But it gets used by networking. 47b221385bSAdrian Bunk */ 48920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page) 49b221385bSAdrian Bunk { 50b221385bSAdrian Bunk if (PageLRU(page)) { 51b221385bSAdrian Bunk unsigned long flags; 52b221385bSAdrian Bunk struct zone *zone = page_zone(page); 53b221385bSAdrian Bunk 54b221385bSAdrian Bunk spin_lock_irqsave(&zone->lru_lock, flags); 55b221385bSAdrian Bunk VM_BUG_ON(!PageLRU(page)); 56b221385bSAdrian Bunk __ClearPageLRU(page); 57b221385bSAdrian Bunk del_page_from_lru(zone, page); 58b221385bSAdrian Bunk spin_unlock_irqrestore(&zone->lru_lock, flags); 59b221385bSAdrian Bunk } 6091807063SAndrea Arcangeli } 6191807063SAndrea Arcangeli 6291807063SAndrea Arcangeli static void __put_single_page(struct page *page) 6391807063SAndrea Arcangeli { 6491807063SAndrea Arcangeli __page_cache_release(page); 65fc91668eSLi Hong free_hot_cold_page(page, 0); 66b221385bSAdrian Bunk } 67b221385bSAdrian Bunk 6891807063SAndrea Arcangeli static void __put_compound_page(struct page *page) 6991807063SAndrea Arcangeli { 7091807063SAndrea Arcangeli compound_page_dtor *dtor; 7191807063SAndrea Arcangeli 7291807063SAndrea Arcangeli __page_cache_release(page); 7391807063SAndrea Arcangeli dtor = get_compound_page_dtor(page); 7491807063SAndrea Arcangeli (*dtor)(page); 7591807063SAndrea Arcangeli } 7691807063SAndrea Arcangeli 778519fb30SNick Piggin static void put_compound_page(struct page *page) 781da177e4SLinus Torvalds { 7991807063SAndrea Arcangeli if (unlikely(PageTail(page))) { 8091807063SAndrea Arcangeli /* __split_huge_page_refcount can run under us */ 8191807063SAndrea Arcangeli struct page *page_head = page->first_page; 8291807063SAndrea Arcangeli smp_rmb(); 8391807063SAndrea Arcangeli /* 8491807063SAndrea Arcangeli * If PageTail is still set after smp_rmb() we can be sure 8591807063SAndrea Arcangeli * that the page->first_page we read wasn't a dangling pointer. 8691807063SAndrea Arcangeli * See __split_huge_page_refcount() smp_wmb(). 8791807063SAndrea Arcangeli */ 8891807063SAndrea Arcangeli if (likely(PageTail(page) && get_page_unless_zero(page_head))) { 8991807063SAndrea Arcangeli unsigned long flags; 9091807063SAndrea Arcangeli /* 9191807063SAndrea Arcangeli * Verify that our page_head wasn't converted 9291807063SAndrea Arcangeli * to a a regular page before we got a 9391807063SAndrea Arcangeli * reference on it. 9491807063SAndrea Arcangeli */ 9591807063SAndrea Arcangeli if (unlikely(!PageHead(page_head))) { 9691807063SAndrea Arcangeli /* PageHead is cleared after PageTail */ 9791807063SAndrea Arcangeli smp_rmb(); 9891807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 9991807063SAndrea Arcangeli goto out_put_head; 10091807063SAndrea Arcangeli } 10191807063SAndrea Arcangeli /* 10291807063SAndrea Arcangeli * Only run compound_lock on a valid PageHead, 10391807063SAndrea Arcangeli * after having it pinned with 10491807063SAndrea Arcangeli * get_page_unless_zero() above. 10591807063SAndrea Arcangeli */ 10691807063SAndrea Arcangeli smp_mb(); 10791807063SAndrea Arcangeli /* page_head wasn't a dangling pointer */ 10891807063SAndrea Arcangeli flags = compound_lock_irqsave(page_head); 10991807063SAndrea Arcangeli if (unlikely(!PageTail(page))) { 11091807063SAndrea Arcangeli /* __split_huge_page_refcount run before us */ 11191807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 11291807063SAndrea Arcangeli VM_BUG_ON(PageHead(page_head)); 11391807063SAndrea Arcangeli out_put_head: 11491807063SAndrea Arcangeli if (put_page_testzero(page_head)) 11591807063SAndrea Arcangeli __put_single_page(page_head); 11691807063SAndrea Arcangeli out_put_single: 11791807063SAndrea Arcangeli if (put_page_testzero(page)) 11891807063SAndrea Arcangeli __put_single_page(page); 11991807063SAndrea Arcangeli return; 12091807063SAndrea Arcangeli } 12191807063SAndrea Arcangeli VM_BUG_ON(page_head != page->first_page); 12291807063SAndrea Arcangeli /* 12391807063SAndrea Arcangeli * We can release the refcount taken by 12491807063SAndrea Arcangeli * get_page_unless_zero now that 12591807063SAndrea Arcangeli * split_huge_page_refcount is blocked on the 12691807063SAndrea Arcangeli * compound_lock. 12791807063SAndrea Arcangeli */ 12891807063SAndrea Arcangeli if (put_page_testzero(page_head)) 12991807063SAndrea Arcangeli VM_BUG_ON(1); 13091807063SAndrea Arcangeli /* __split_huge_page_refcount will wait now */ 13191807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page->_count) <= 0); 13291807063SAndrea Arcangeli atomic_dec(&page->_count); 13391807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 13491807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 135a95a82e9SAndrea Arcangeli if (put_page_testzero(page_head)) { 136a95a82e9SAndrea Arcangeli if (PageHead(page_head)) 13791807063SAndrea Arcangeli __put_compound_page(page_head); 138a95a82e9SAndrea Arcangeli else 139a95a82e9SAndrea Arcangeli __put_single_page(page_head); 140a95a82e9SAndrea Arcangeli } 14191807063SAndrea Arcangeli } else { 14291807063SAndrea Arcangeli /* page_head is a dangling pointer */ 14391807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 14491807063SAndrea Arcangeli goto out_put_single; 14591807063SAndrea Arcangeli } 14691807063SAndrea Arcangeli } else if (put_page_testzero(page)) { 14791807063SAndrea Arcangeli if (PageHead(page)) 14891807063SAndrea Arcangeli __put_compound_page(page); 14991807063SAndrea Arcangeli else 15091807063SAndrea Arcangeli __put_single_page(page); 1511da177e4SLinus Torvalds } 1521da177e4SLinus Torvalds } 1538519fb30SNick Piggin 1548519fb30SNick Piggin void put_page(struct page *page) 1558519fb30SNick Piggin { 1568519fb30SNick Piggin if (unlikely(PageCompound(page))) 1578519fb30SNick Piggin put_compound_page(page); 1588519fb30SNick Piggin else if (put_page_testzero(page)) 15991807063SAndrea Arcangeli __put_single_page(page); 1601da177e4SLinus Torvalds } 1611da177e4SLinus Torvalds EXPORT_SYMBOL(put_page); 1621da177e4SLinus Torvalds 1631d7ea732SAlexander Zarochentsev /** 1647682486bSRandy Dunlap * put_pages_list() - release a list of pages 1657682486bSRandy Dunlap * @pages: list of pages threaded on page->lru 1661d7ea732SAlexander Zarochentsev * 1671d7ea732SAlexander Zarochentsev * Release a list of pages which are strung together on page.lru. Currently 1681d7ea732SAlexander Zarochentsev * used by read_cache_pages() and related error recovery code. 1691d7ea732SAlexander Zarochentsev */ 1701d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages) 1711d7ea732SAlexander Zarochentsev { 1721d7ea732SAlexander Zarochentsev while (!list_empty(pages)) { 1731d7ea732SAlexander Zarochentsev struct page *victim; 1741d7ea732SAlexander Zarochentsev 1751d7ea732SAlexander Zarochentsev victim = list_entry(pages->prev, struct page, lru); 1761d7ea732SAlexander Zarochentsev list_del(&victim->lru); 1771d7ea732SAlexander Zarochentsev page_cache_release(victim); 1781d7ea732SAlexander Zarochentsev } 1791d7ea732SAlexander Zarochentsev } 1801d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list); 1811d7ea732SAlexander Zarochentsev 18283896fb5SLinus Torvalds /* 18383896fb5SLinus Torvalds * pagevec_move_tail() must be called with IRQ disabled. 18483896fb5SLinus Torvalds * Otherwise this may cause nasty races. 18583896fb5SLinus Torvalds */ 18683896fb5SLinus Torvalds static void pagevec_move_tail(struct pagevec *pvec) 187902aaed0SHisashi Hifumi { 188902aaed0SHisashi Hifumi int i; 18983896fb5SLinus Torvalds int pgmoved = 0; 190902aaed0SHisashi Hifumi struct zone *zone = NULL; 191902aaed0SHisashi Hifumi 192902aaed0SHisashi Hifumi for (i = 0; i < pagevec_count(pvec); i++) { 193902aaed0SHisashi Hifumi struct page *page = pvec->pages[i]; 194902aaed0SHisashi Hifumi struct zone *pagezone = page_zone(page); 195902aaed0SHisashi Hifumi 196902aaed0SHisashi Hifumi if (pagezone != zone) { 197902aaed0SHisashi Hifumi if (zone) 19883896fb5SLinus Torvalds spin_unlock(&zone->lru_lock); 199902aaed0SHisashi Hifumi zone = pagezone; 20083896fb5SLinus Torvalds spin_lock(&zone->lru_lock); 201902aaed0SHisashi Hifumi } 202894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 203401a8e1cSJohannes Weiner int lru = page_lru_base_type(page); 2044f98a2feSRik van Riel list_move_tail(&page->lru, &zone->lru[lru].list); 20583896fb5SLinus Torvalds pgmoved++; 206902aaed0SHisashi Hifumi } 207902aaed0SHisashi Hifumi } 20883896fb5SLinus Torvalds if (zone) 20983896fb5SLinus Torvalds spin_unlock(&zone->lru_lock); 210902aaed0SHisashi Hifumi __count_vm_events(PGROTATED, pgmoved); 21183896fb5SLinus Torvalds release_pages(pvec->pages, pvec->nr, pvec->cold); 21283896fb5SLinus Torvalds pagevec_reinit(pvec); 213902aaed0SHisashi Hifumi } 214902aaed0SHisashi Hifumi 215902aaed0SHisashi Hifumi /* 2161da177e4SLinus Torvalds * Writeback is about to end against a page which has been marked for immediate 2171da177e4SLinus Torvalds * reclaim. If it still appears to be reclaimable, move it to the tail of the 218902aaed0SHisashi Hifumi * inactive list. 2191da177e4SLinus Torvalds */ 220ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page) 2211da177e4SLinus Torvalds { 222ac6aadb2SMiklos Szeredi if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 223894bc310SLee Schermerhorn !PageUnevictable(page) && PageLRU(page)) { 224902aaed0SHisashi Hifumi struct pagevec *pvec; 2251da177e4SLinus Torvalds unsigned long flags; 2261da177e4SLinus Torvalds 227902aaed0SHisashi Hifumi page_cache_get(page); 228902aaed0SHisashi Hifumi local_irq_save(flags); 229902aaed0SHisashi Hifumi pvec = &__get_cpu_var(lru_rotate_pvecs); 230902aaed0SHisashi Hifumi if (!pagevec_add(pvec, page)) 231902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 232902aaed0SHisashi Hifumi local_irq_restore(flags); 233ac6aadb2SMiklos Szeredi } 2341da177e4SLinus Torvalds } 2351da177e4SLinus Torvalds 2363e2f41f1SKOSAKI Motohiro static void update_page_reclaim_stat(struct zone *zone, struct page *page, 2373e2f41f1SKOSAKI Motohiro int file, int rotated) 2383e2f41f1SKOSAKI Motohiro { 2393e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; 2403e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *memcg_reclaim_stat; 2413e2f41f1SKOSAKI Motohiro 2423e2f41f1SKOSAKI Motohiro memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); 2433e2f41f1SKOSAKI Motohiro 2443e2f41f1SKOSAKI Motohiro reclaim_stat->recent_scanned[file]++; 2453e2f41f1SKOSAKI Motohiro if (rotated) 2463e2f41f1SKOSAKI Motohiro reclaim_stat->recent_rotated[file]++; 2473e2f41f1SKOSAKI Motohiro 2483e2f41f1SKOSAKI Motohiro if (!memcg_reclaim_stat) 2493e2f41f1SKOSAKI Motohiro return; 2503e2f41f1SKOSAKI Motohiro 2513e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_scanned[file]++; 2523e2f41f1SKOSAKI Motohiro if (rotated) 2533e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_rotated[file]++; 2543e2f41f1SKOSAKI Motohiro } 2553e2f41f1SKOSAKI Motohiro 2561da177e4SLinus Torvalds /* 2577a608572SLinus Torvalds * FIXME: speed this up? 2581da177e4SLinus Torvalds */ 2597a608572SLinus Torvalds void activate_page(struct page *page) 260744ed144SShaohua Li { 261744ed144SShaohua Li struct zone *zone = page_zone(page); 2627a608572SLinus Torvalds 2637a608572SLinus Torvalds spin_lock_irq(&zone->lru_lock); 2647a608572SLinus Torvalds if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 265744ed144SShaohua Li int file = page_is_file_cache(page); 266744ed144SShaohua Li int lru = page_lru_base_type(page); 267744ed144SShaohua Li del_page_from_lru_list(zone, page, lru); 268744ed144SShaohua Li 269744ed144SShaohua Li SetPageActive(page); 270744ed144SShaohua Li lru += LRU_ACTIVE; 271744ed144SShaohua Li add_page_to_lru_list(zone, page, lru); 272744ed144SShaohua Li __count_vm_event(PGACTIVATE); 2737a608572SLinus Torvalds 274744ed144SShaohua Li update_page_reclaim_stat(zone, page, file, 1); 275744ed144SShaohua Li } 2761da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds /* 2801da177e4SLinus Torvalds * Mark a page as having seen activity. 2811da177e4SLinus Torvalds * 2821da177e4SLinus Torvalds * inactive,unreferenced -> inactive,referenced 2831da177e4SLinus Torvalds * inactive,referenced -> active,unreferenced 2841da177e4SLinus Torvalds * active,unreferenced -> active,referenced 2851da177e4SLinus Torvalds */ 286920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page) 2871da177e4SLinus Torvalds { 288894bc310SLee Schermerhorn if (!PageActive(page) && !PageUnevictable(page) && 289894bc310SLee Schermerhorn PageReferenced(page) && PageLRU(page)) { 2901da177e4SLinus Torvalds activate_page(page); 2911da177e4SLinus Torvalds ClearPageReferenced(page); 2921da177e4SLinus Torvalds } else if (!PageReferenced(page)) { 2931da177e4SLinus Torvalds SetPageReferenced(page); 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds } 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed); 2981da177e4SLinus Torvalds 299f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru) 3001da177e4SLinus Torvalds { 301f04e9ebbSKOSAKI Motohiro struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds page_cache_get(page); 3041da177e4SLinus Torvalds if (!pagevec_add(pvec, page)) 305f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 3061da177e4SLinus Torvalds put_cpu_var(lru_add_pvecs); 3071da177e4SLinus Torvalds } 30847846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add); 3091da177e4SLinus Torvalds 310f04e9ebbSKOSAKI Motohiro /** 311f04e9ebbSKOSAKI Motohiro * lru_cache_add_lru - add a page to a page list 312f04e9ebbSKOSAKI Motohiro * @page: the page to be added to the LRU. 313f04e9ebbSKOSAKI Motohiro * @lru: the LRU list to which the page is added. 314f04e9ebbSKOSAKI Motohiro */ 315f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru) 3161da177e4SLinus Torvalds { 317f04e9ebbSKOSAKI Motohiro if (PageActive(page)) { 318894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 319f04e9ebbSKOSAKI Motohiro ClearPageActive(page); 320894bc310SLee Schermerhorn } else if (PageUnevictable(page)) { 321894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 322894bc310SLee Schermerhorn ClearPageUnevictable(page); 323f04e9ebbSKOSAKI Motohiro } 3241da177e4SLinus Torvalds 325894bc310SLee Schermerhorn VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); 326f04e9ebbSKOSAKI Motohiro __lru_cache_add(page, lru); 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 329894bc310SLee Schermerhorn /** 330894bc310SLee Schermerhorn * add_page_to_unevictable_list - add a page to the unevictable list 331894bc310SLee Schermerhorn * @page: the page to be added to the unevictable list 332894bc310SLee Schermerhorn * 333894bc310SLee Schermerhorn * Add page directly to its zone's unevictable list. To avoid races with 334894bc310SLee Schermerhorn * tasks that might be making the page evictable, through eg. munlock, 335894bc310SLee Schermerhorn * munmap or exit, while it's not on the lru, we want to add the page 336894bc310SLee Schermerhorn * while it's locked or otherwise "invisible" to other tasks. This is 337894bc310SLee Schermerhorn * difficult to do when using the pagevec cache, so bypass that. 338894bc310SLee Schermerhorn */ 339894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page) 340894bc310SLee Schermerhorn { 341894bc310SLee Schermerhorn struct zone *zone = page_zone(page); 342894bc310SLee Schermerhorn 343894bc310SLee Schermerhorn spin_lock_irq(&zone->lru_lock); 344894bc310SLee Schermerhorn SetPageUnevictable(page); 345894bc310SLee Schermerhorn SetPageLRU(page); 346894bc310SLee Schermerhorn add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); 347894bc310SLee Schermerhorn spin_unlock_irq(&zone->lru_lock); 348894bc310SLee Schermerhorn } 349894bc310SLee Schermerhorn 350902aaed0SHisashi Hifumi /* 351*31560180SMinchan Kim * If the page can not be invalidated, it is moved to the 352*31560180SMinchan Kim * inactive list to speed up its reclaim. It is moved to the 353*31560180SMinchan Kim * head of the list, rather than the tail, to give the flusher 354*31560180SMinchan Kim * threads some time to write it out, as this is much more 355*31560180SMinchan Kim * effective than the single-page writeout from reclaim. 356*31560180SMinchan Kim */ 357*31560180SMinchan Kim static void lru_deactivate(struct page *page, struct zone *zone) 358*31560180SMinchan Kim { 359*31560180SMinchan Kim int lru, file; 360*31560180SMinchan Kim 361*31560180SMinchan Kim if (!PageLRU(page) || !PageActive(page)) 362*31560180SMinchan Kim return; 363*31560180SMinchan Kim 364*31560180SMinchan Kim /* Some processes are using the page */ 365*31560180SMinchan Kim if (page_mapped(page)) 366*31560180SMinchan Kim return; 367*31560180SMinchan Kim 368*31560180SMinchan Kim file = page_is_file_cache(page); 369*31560180SMinchan Kim lru = page_lru_base_type(page); 370*31560180SMinchan Kim del_page_from_lru_list(zone, page, lru + LRU_ACTIVE); 371*31560180SMinchan Kim ClearPageActive(page); 372*31560180SMinchan Kim ClearPageReferenced(page); 373*31560180SMinchan Kim add_page_to_lru_list(zone, page, lru); 374*31560180SMinchan Kim __count_vm_event(PGDEACTIVATE); 375*31560180SMinchan Kim 376*31560180SMinchan Kim update_page_reclaim_stat(zone, page, file, 0); 377*31560180SMinchan Kim } 378*31560180SMinchan Kim 379*31560180SMinchan Kim static void ____pagevec_lru_deactivate(struct pagevec *pvec) 380*31560180SMinchan Kim { 381*31560180SMinchan Kim int i; 382*31560180SMinchan Kim struct zone *zone = NULL; 383*31560180SMinchan Kim 384*31560180SMinchan Kim for (i = 0; i < pagevec_count(pvec); i++) { 385*31560180SMinchan Kim struct page *page = pvec->pages[i]; 386*31560180SMinchan Kim struct zone *pagezone = page_zone(page); 387*31560180SMinchan Kim 388*31560180SMinchan Kim if (pagezone != zone) { 389*31560180SMinchan Kim if (zone) 390*31560180SMinchan Kim spin_unlock_irq(&zone->lru_lock); 391*31560180SMinchan Kim zone = pagezone; 392*31560180SMinchan Kim spin_lock_irq(&zone->lru_lock); 393*31560180SMinchan Kim } 394*31560180SMinchan Kim lru_deactivate(page, zone); 395*31560180SMinchan Kim } 396*31560180SMinchan Kim if (zone) 397*31560180SMinchan Kim spin_unlock_irq(&zone->lru_lock); 398*31560180SMinchan Kim 399*31560180SMinchan Kim release_pages(pvec->pages, pvec->nr, pvec->cold); 400*31560180SMinchan Kim pagevec_reinit(pvec); 401*31560180SMinchan Kim } 402*31560180SMinchan Kim 403*31560180SMinchan Kim 404*31560180SMinchan Kim /* 405902aaed0SHisashi Hifumi * Drain pages out of the cpu's pagevecs. 406902aaed0SHisashi Hifumi * Either "cpu" is the current CPU, and preemption has already been 407902aaed0SHisashi Hifumi * disabled; or "cpu" is being hot-unplugged, and is already dead. 408902aaed0SHisashi Hifumi */ 409902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu) 4101da177e4SLinus Torvalds { 411f04e9ebbSKOSAKI Motohiro struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); 412902aaed0SHisashi Hifumi struct pagevec *pvec; 413f04e9ebbSKOSAKI Motohiro int lru; 4141da177e4SLinus Torvalds 415f04e9ebbSKOSAKI Motohiro for_each_lru(lru) { 416f04e9ebbSKOSAKI Motohiro pvec = &pvecs[lru - LRU_BASE]; 4171da177e4SLinus Torvalds if (pagevec_count(pvec)) 418f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 419f04e9ebbSKOSAKI Motohiro } 420902aaed0SHisashi Hifumi 421902aaed0SHisashi Hifumi pvec = &per_cpu(lru_rotate_pvecs, cpu); 422902aaed0SHisashi Hifumi if (pagevec_count(pvec)) { 423902aaed0SHisashi Hifumi unsigned long flags; 424902aaed0SHisashi Hifumi 425902aaed0SHisashi Hifumi /* No harm done if a racing interrupt already did this */ 426902aaed0SHisashi Hifumi local_irq_save(flags); 427902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 428902aaed0SHisashi Hifumi local_irq_restore(flags); 429902aaed0SHisashi Hifumi } 430*31560180SMinchan Kim 431*31560180SMinchan Kim pvec = &per_cpu(lru_deactivate_pvecs, cpu); 432*31560180SMinchan Kim if (pagevec_count(pvec)) 433*31560180SMinchan Kim ____pagevec_lru_deactivate(pvec); 434*31560180SMinchan Kim } 435*31560180SMinchan Kim 436*31560180SMinchan Kim /** 437*31560180SMinchan Kim * deactivate_page - forcefully deactivate a page 438*31560180SMinchan Kim * @page: page to deactivate 439*31560180SMinchan Kim * 440*31560180SMinchan Kim * This function hints the VM that @page is a good reclaim candidate, 441*31560180SMinchan Kim * for example if its invalidation fails due to the page being dirty 442*31560180SMinchan Kim * or under writeback. 443*31560180SMinchan Kim */ 444*31560180SMinchan Kim void deactivate_page(struct page *page) 445*31560180SMinchan Kim { 446*31560180SMinchan Kim if (likely(get_page_unless_zero(page))) { 447*31560180SMinchan Kim struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); 448*31560180SMinchan Kim 449*31560180SMinchan Kim if (!pagevec_add(pvec, page)) 450*31560180SMinchan Kim ____pagevec_lru_deactivate(pvec); 451*31560180SMinchan Kim put_cpu_var(lru_deactivate_pvecs); 452*31560180SMinchan Kim } 45380bfed90SAndrew Morton } 45480bfed90SAndrew Morton 45580bfed90SAndrew Morton void lru_add_drain(void) 45680bfed90SAndrew Morton { 457902aaed0SHisashi Hifumi drain_cpu_pagevecs(get_cpu()); 45880bfed90SAndrew Morton put_cpu(); 4591da177e4SLinus Torvalds } 4601da177e4SLinus Torvalds 461c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy) 462053837fcSNick Piggin { 463053837fcSNick Piggin lru_add_drain(); 464053837fcSNick Piggin } 465053837fcSNick Piggin 466053837fcSNick Piggin /* 467053837fcSNick Piggin * Returns 0 for success 468053837fcSNick Piggin */ 469053837fcSNick Piggin int lru_add_drain_all(void) 470053837fcSNick Piggin { 471c4028958SDavid Howells return schedule_on_each_cpu(lru_add_drain_per_cpu); 472053837fcSNick Piggin } 473053837fcSNick Piggin 4741da177e4SLinus Torvalds /* 4751da177e4SLinus Torvalds * Batched page_cache_release(). Decrement the reference count on all the 4761da177e4SLinus Torvalds * passed pages. If it fell to zero then remove the page from the LRU and 4771da177e4SLinus Torvalds * free it. 4781da177e4SLinus Torvalds * 4791da177e4SLinus Torvalds * Avoid taking zone->lru_lock if possible, but if it is taken, retain it 4801da177e4SLinus Torvalds * for the remainder of the operation. 4811da177e4SLinus Torvalds * 482ab33dc09SFernando Luis Vazquez Cao * The locking in this function is against shrink_inactive_list(): we recheck 483ab33dc09SFernando Luis Vazquez Cao * the page count inside the lock to see whether shrink_inactive_list() 484ab33dc09SFernando Luis Vazquez Cao * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() 485ab33dc09SFernando Luis Vazquez Cao * will free it. 4861da177e4SLinus Torvalds */ 4871da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold) 4881da177e4SLinus Torvalds { 4891da177e4SLinus Torvalds int i; 4901da177e4SLinus Torvalds struct pagevec pages_to_free; 4911da177e4SLinus Torvalds struct zone *zone = NULL; 492902aaed0SHisashi Hifumi unsigned long uninitialized_var(flags); 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds pagevec_init(&pages_to_free, cold); 4951da177e4SLinus Torvalds for (i = 0; i < nr; i++) { 4961da177e4SLinus Torvalds struct page *page = pages[i]; 4971da177e4SLinus Torvalds 4988519fb30SNick Piggin if (unlikely(PageCompound(page))) { 4998519fb30SNick Piggin if (zone) { 500902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 5018519fb30SNick Piggin zone = NULL; 5028519fb30SNick Piggin } 5038519fb30SNick Piggin put_compound_page(page); 5048519fb30SNick Piggin continue; 5058519fb30SNick Piggin } 5068519fb30SNick Piggin 507b5810039SNick Piggin if (!put_page_testzero(page)) 5081da177e4SLinus Torvalds continue; 5091da177e4SLinus Torvalds 51046453a6eSNick Piggin if (PageLRU(page)) { 51146453a6eSNick Piggin struct zone *pagezone = page_zone(page); 512894bc310SLee Schermerhorn 5131da177e4SLinus Torvalds if (pagezone != zone) { 5141da177e4SLinus Torvalds if (zone) 515902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, 516902aaed0SHisashi Hifumi flags); 5171da177e4SLinus Torvalds zone = pagezone; 518902aaed0SHisashi Hifumi spin_lock_irqsave(&zone->lru_lock, flags); 5191da177e4SLinus Torvalds } 520725d704eSNick Piggin VM_BUG_ON(!PageLRU(page)); 52167453911SNick Piggin __ClearPageLRU(page); 5221da177e4SLinus Torvalds del_page_from_lru(zone, page); 52346453a6eSNick Piggin } 52446453a6eSNick Piggin 5251da177e4SLinus Torvalds if (!pagevec_add(&pages_to_free, page)) { 52646453a6eSNick Piggin if (zone) { 527902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 52846453a6eSNick Piggin zone = NULL; 52946453a6eSNick Piggin } 5301da177e4SLinus Torvalds __pagevec_free(&pages_to_free); 5311da177e4SLinus Torvalds pagevec_reinit(&pages_to_free); 5321da177e4SLinus Torvalds } 5331da177e4SLinus Torvalds } 5341da177e4SLinus Torvalds if (zone) 535902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds pagevec_free(&pages_to_free); 5381da177e4SLinus Torvalds } 5390be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages); 5401da177e4SLinus Torvalds 5411da177e4SLinus Torvalds /* 5421da177e4SLinus Torvalds * The pages which we're about to release may be in the deferred lru-addition 5431da177e4SLinus Torvalds * queues. That would prevent them from really being freed right now. That's 5441da177e4SLinus Torvalds * OK from a correctness point of view but is inefficient - those pages may be 5451da177e4SLinus Torvalds * cache-warm and we want to give them back to the page allocator ASAP. 5461da177e4SLinus Torvalds * 5471da177e4SLinus Torvalds * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 5481da177e4SLinus Torvalds * and __pagevec_lru_add_active() call release_pages() directly to avoid 5491da177e4SLinus Torvalds * mutual recursion. 5501da177e4SLinus Torvalds */ 5511da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec) 5521da177e4SLinus Torvalds { 5531da177e4SLinus Torvalds lru_add_drain(); 5541da177e4SLinus Torvalds release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 5551da177e4SLinus Torvalds pagevec_reinit(pvec); 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds 5587f285701SSteve French EXPORT_SYMBOL(__pagevec_release); 5597f285701SSteve French 56071e3aac0SAndrea Arcangeli /* used by __split_huge_page_refcount() */ 56171e3aac0SAndrea Arcangeli void lru_add_page_tail(struct zone* zone, 56271e3aac0SAndrea Arcangeli struct page *page, struct page *page_tail) 56371e3aac0SAndrea Arcangeli { 56471e3aac0SAndrea Arcangeli int active; 56571e3aac0SAndrea Arcangeli enum lru_list lru; 56671e3aac0SAndrea Arcangeli const int file = 0; 56771e3aac0SAndrea Arcangeli struct list_head *head; 56871e3aac0SAndrea Arcangeli 56971e3aac0SAndrea Arcangeli VM_BUG_ON(!PageHead(page)); 57071e3aac0SAndrea Arcangeli VM_BUG_ON(PageCompound(page_tail)); 57171e3aac0SAndrea Arcangeli VM_BUG_ON(PageLRU(page_tail)); 57271e3aac0SAndrea Arcangeli VM_BUG_ON(!spin_is_locked(&zone->lru_lock)); 57371e3aac0SAndrea Arcangeli 57471e3aac0SAndrea Arcangeli SetPageLRU(page_tail); 57571e3aac0SAndrea Arcangeli 57671e3aac0SAndrea Arcangeli if (page_evictable(page_tail, NULL)) { 57771e3aac0SAndrea Arcangeli if (PageActive(page)) { 57871e3aac0SAndrea Arcangeli SetPageActive(page_tail); 57971e3aac0SAndrea Arcangeli active = 1; 58071e3aac0SAndrea Arcangeli lru = LRU_ACTIVE_ANON; 58171e3aac0SAndrea Arcangeli } else { 58271e3aac0SAndrea Arcangeli active = 0; 58371e3aac0SAndrea Arcangeli lru = LRU_INACTIVE_ANON; 58471e3aac0SAndrea Arcangeli } 58571e3aac0SAndrea Arcangeli update_page_reclaim_stat(zone, page_tail, file, active); 58671e3aac0SAndrea Arcangeli if (likely(PageLRU(page))) 58771e3aac0SAndrea Arcangeli head = page->lru.prev; 58871e3aac0SAndrea Arcangeli else 58971e3aac0SAndrea Arcangeli head = &zone->lru[lru].list; 59071e3aac0SAndrea Arcangeli __add_page_to_lru_list(zone, page_tail, lru, head); 59171e3aac0SAndrea Arcangeli } else { 59271e3aac0SAndrea Arcangeli SetPageUnevictable(page_tail); 59371e3aac0SAndrea Arcangeli add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); 59471e3aac0SAndrea Arcangeli } 59571e3aac0SAndrea Arcangeli } 59671e3aac0SAndrea Arcangeli 5971da177e4SLinus Torvalds /* 5981da177e4SLinus Torvalds * Add the passed pages to the LRU, then drop the caller's refcount 5991da177e4SLinus Torvalds * on them. Reinitialises the caller's pagevec. 6001da177e4SLinus Torvalds */ 601f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) 6021da177e4SLinus Torvalds { 60383896fb5SLinus Torvalds int i; 60483896fb5SLinus Torvalds struct zone *zone = NULL; 60583896fb5SLinus Torvalds 606894bc310SLee Schermerhorn VM_BUG_ON(is_unevictable_lru(lru)); 6071da177e4SLinus Torvalds 60883896fb5SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 60983896fb5SLinus Torvalds struct page *page = pvec->pages[i]; 61083896fb5SLinus Torvalds struct zone *pagezone = page_zone(page); 61183896fb5SLinus Torvalds int file; 61283896fb5SLinus Torvalds int active; 61383896fb5SLinus Torvalds 61483896fb5SLinus Torvalds if (pagezone != zone) { 61583896fb5SLinus Torvalds if (zone) 61683896fb5SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 61783896fb5SLinus Torvalds zone = pagezone; 61883896fb5SLinus Torvalds spin_lock_irq(&zone->lru_lock); 61983896fb5SLinus Torvalds } 62083896fb5SLinus Torvalds VM_BUG_ON(PageActive(page)); 62183896fb5SLinus Torvalds VM_BUG_ON(PageUnevictable(page)); 62283896fb5SLinus Torvalds VM_BUG_ON(PageLRU(page)); 62383896fb5SLinus Torvalds SetPageLRU(page); 62483896fb5SLinus Torvalds active = is_active_lru(lru); 62583896fb5SLinus Torvalds file = is_file_lru(lru); 62683896fb5SLinus Torvalds if (active) 62783896fb5SLinus Torvalds SetPageActive(page); 62883896fb5SLinus Torvalds update_page_reclaim_stat(zone, page, file, active); 62983896fb5SLinus Torvalds add_page_to_lru_list(zone, page, lru); 63083896fb5SLinus Torvalds } 63183896fb5SLinus Torvalds if (zone) 63283896fb5SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 63383896fb5SLinus Torvalds release_pages(pvec->pages, pvec->nr, pvec->cold); 63483896fb5SLinus Torvalds pagevec_reinit(pvec); 6351da177e4SLinus Torvalds } 6361da177e4SLinus Torvalds 637f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add); 638f04e9ebbSKOSAKI Motohiro 6391da177e4SLinus Torvalds /* 6401da177e4SLinus Torvalds * Try to drop buffers from the pages in a pagevec 6411da177e4SLinus Torvalds */ 6421da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec) 6431da177e4SLinus Torvalds { 6441da177e4SLinus Torvalds int i; 6451da177e4SLinus Torvalds 6461da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 6471da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 6481da177e4SLinus Torvalds 649266cf658SDavid Howells if (page_has_private(page) && trylock_page(page)) { 650266cf658SDavid Howells if (page_has_private(page)) 6511da177e4SLinus Torvalds try_to_release_page(page, 0); 6521da177e4SLinus Torvalds unlock_page(page); 6531da177e4SLinus Torvalds } 6541da177e4SLinus Torvalds } 6551da177e4SLinus Torvalds } 6561da177e4SLinus Torvalds 6571da177e4SLinus Torvalds /** 6581da177e4SLinus Torvalds * pagevec_lookup - gang pagecache lookup 6591da177e4SLinus Torvalds * @pvec: Where the resulting pages are placed 6601da177e4SLinus Torvalds * @mapping: The address_space to search 6611da177e4SLinus Torvalds * @start: The starting page index 6621da177e4SLinus Torvalds * @nr_pages: The maximum number of pages 6631da177e4SLinus Torvalds * 6641da177e4SLinus Torvalds * pagevec_lookup() will search for and return a group of up to @nr_pages pages 6651da177e4SLinus Torvalds * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a 6661da177e4SLinus Torvalds * reference against the pages in @pvec. 6671da177e4SLinus Torvalds * 6681da177e4SLinus Torvalds * The search returns a group of mapping-contiguous pages with ascending 6691da177e4SLinus Torvalds * indexes. There may be holes in the indices due to not-present pages. 6701da177e4SLinus Torvalds * 6711da177e4SLinus Torvalds * pagevec_lookup() returns the number of pages which were found. 6721da177e4SLinus Torvalds */ 6731da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 6741da177e4SLinus Torvalds pgoff_t start, unsigned nr_pages) 6751da177e4SLinus Torvalds { 6761da177e4SLinus Torvalds pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); 6771da177e4SLinus Torvalds return pagevec_count(pvec); 6781da177e4SLinus Torvalds } 6791da177e4SLinus Torvalds 68078539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup); 68178539fdfSChristoph Hellwig 6821da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 6831da177e4SLinus Torvalds pgoff_t *index, int tag, unsigned nr_pages) 6841da177e4SLinus Torvalds { 6851da177e4SLinus Torvalds pvec->nr = find_get_pages_tag(mapping, index, tag, 6861da177e4SLinus Torvalds nr_pages, pvec->pages); 6871da177e4SLinus Torvalds return pagevec_count(pvec); 6881da177e4SLinus Torvalds } 6891da177e4SLinus Torvalds 6907f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag); 6911da177e4SLinus Torvalds 6921da177e4SLinus Torvalds /* 6931da177e4SLinus Torvalds * Perform any setup for the swap system 6941da177e4SLinus Torvalds */ 6951da177e4SLinus Torvalds void __init swap_setup(void) 6961da177e4SLinus Torvalds { 6974481374cSJan Beulich unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); 6981da177e4SLinus Torvalds 699e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP 700e0bf68ddSPeter Zijlstra bdi_init(swapper_space.backing_dev_info); 701e0bf68ddSPeter Zijlstra #endif 702e0bf68ddSPeter Zijlstra 7031da177e4SLinus Torvalds /* Use a smaller cluster for small-memory machines */ 7041da177e4SLinus Torvalds if (megs < 16) 7051da177e4SLinus Torvalds page_cluster = 2; 7061da177e4SLinus Torvalds else 7071da177e4SLinus Torvalds page_cluster = 3; 7081da177e4SLinus Torvalds /* 7091da177e4SLinus Torvalds * Right now other parts of the system means that we 7101da177e4SLinus Torvalds * _really_ don't want to cluster much more 7111da177e4SLinus Torvalds */ 7121da177e4SLinus Torvalds } 713