11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds /* 8183ff22bSSimon Arlott * This file contains the default values for the operation of the 91da177e4SLinus Torvalds * Linux VM subsystem. Fine-tuning documentation can be found in 101da177e4SLinus Torvalds * Documentation/sysctl/vm.txt. 111da177e4SLinus Torvalds * Started 18.12.91 121da177e4SLinus Torvalds * Swap aging added 23.2.95, Stephen Tweedie. 131da177e4SLinus Torvalds * Buffermem limits added 12.3.98, Rik van Riel. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/sched.h> 181da177e4SLinus Torvalds #include <linux/kernel_stat.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/mman.h> 211da177e4SLinus Torvalds #include <linux/pagemap.h> 221da177e4SLinus Torvalds #include <linux/pagevec.h> 231da177e4SLinus Torvalds #include <linux/init.h> 241da177e4SLinus Torvalds #include <linux/module.h> 251da177e4SLinus Torvalds #include <linux/mm_inline.h> 261da177e4SLinus Torvalds #include <linux/buffer_head.h> /* for try_to_release_page() */ 271da177e4SLinus Torvalds #include <linux/percpu_counter.h> 281da177e4SLinus Torvalds #include <linux/percpu.h> 291da177e4SLinus Torvalds #include <linux/cpu.h> 301da177e4SLinus Torvalds #include <linux/notifier.h> 31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h> 3266e1707bSBalbir Singh #include <linux/memcontrol.h> 335a0e3ad6STejun Heo #include <linux/gfp.h> 341da177e4SLinus Torvalds 3564d6519dSLee Schermerhorn #include "internal.h" 3664d6519dSLee Schermerhorn 371da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */ 381da177e4SLinus Torvalds int page_cluster; 391da177e4SLinus Torvalds 40f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 41f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 42902aaed0SHisashi Hifumi 43b221385bSAdrian Bunk /* 44b221385bSAdrian Bunk * This path almost never happens for VM activity - pages are normally 45b221385bSAdrian Bunk * freed via pagevecs. But it gets used by networking. 46b221385bSAdrian Bunk */ 47920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page) 48b221385bSAdrian Bunk { 49b221385bSAdrian Bunk if (PageLRU(page)) { 50b221385bSAdrian Bunk unsigned long flags; 51b221385bSAdrian Bunk struct zone *zone = page_zone(page); 52b221385bSAdrian Bunk 53b221385bSAdrian Bunk spin_lock_irqsave(&zone->lru_lock, flags); 54b221385bSAdrian Bunk VM_BUG_ON(!PageLRU(page)); 55b221385bSAdrian Bunk __ClearPageLRU(page); 56b221385bSAdrian Bunk del_page_from_lru(zone, page); 57b221385bSAdrian Bunk spin_unlock_irqrestore(&zone->lru_lock, flags); 58b221385bSAdrian Bunk } 59*91807063SAndrea Arcangeli } 60*91807063SAndrea Arcangeli 61*91807063SAndrea Arcangeli static void __put_single_page(struct page *page) 62*91807063SAndrea Arcangeli { 63*91807063SAndrea Arcangeli __page_cache_release(page); 64fc91668eSLi Hong free_hot_cold_page(page, 0); 65b221385bSAdrian Bunk } 66b221385bSAdrian Bunk 67*91807063SAndrea Arcangeli static void __put_compound_page(struct page *page) 68*91807063SAndrea Arcangeli { 69*91807063SAndrea Arcangeli compound_page_dtor *dtor; 70*91807063SAndrea Arcangeli 71*91807063SAndrea Arcangeli __page_cache_release(page); 72*91807063SAndrea Arcangeli dtor = get_compound_page_dtor(page); 73*91807063SAndrea Arcangeli (*dtor)(page); 74*91807063SAndrea Arcangeli } 75*91807063SAndrea Arcangeli 768519fb30SNick Piggin static void put_compound_page(struct page *page) 771da177e4SLinus Torvalds { 78*91807063SAndrea Arcangeli if (unlikely(PageTail(page))) { 79*91807063SAndrea Arcangeli /* __split_huge_page_refcount can run under us */ 80*91807063SAndrea Arcangeli struct page *page_head = page->first_page; 81*91807063SAndrea Arcangeli smp_rmb(); 82*91807063SAndrea Arcangeli /* 83*91807063SAndrea Arcangeli * If PageTail is still set after smp_rmb() we can be sure 84*91807063SAndrea Arcangeli * that the page->first_page we read wasn't a dangling pointer. 85*91807063SAndrea Arcangeli * See __split_huge_page_refcount() smp_wmb(). 86*91807063SAndrea Arcangeli */ 87*91807063SAndrea Arcangeli if (likely(PageTail(page) && get_page_unless_zero(page_head))) { 88*91807063SAndrea Arcangeli unsigned long flags; 89*91807063SAndrea Arcangeli /* 90*91807063SAndrea Arcangeli * Verify that our page_head wasn't converted 91*91807063SAndrea Arcangeli * to a a regular page before we got a 92*91807063SAndrea Arcangeli * reference on it. 93*91807063SAndrea Arcangeli */ 94*91807063SAndrea Arcangeli if (unlikely(!PageHead(page_head))) { 95*91807063SAndrea Arcangeli /* PageHead is cleared after PageTail */ 96*91807063SAndrea Arcangeli smp_rmb(); 97*91807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 98*91807063SAndrea Arcangeli goto out_put_head; 99*91807063SAndrea Arcangeli } 100*91807063SAndrea Arcangeli /* 101*91807063SAndrea Arcangeli * Only run compound_lock on a valid PageHead, 102*91807063SAndrea Arcangeli * after having it pinned with 103*91807063SAndrea Arcangeli * get_page_unless_zero() above. 104*91807063SAndrea Arcangeli */ 105*91807063SAndrea Arcangeli smp_mb(); 106*91807063SAndrea Arcangeli /* page_head wasn't a dangling pointer */ 107*91807063SAndrea Arcangeli flags = compound_lock_irqsave(page_head); 108*91807063SAndrea Arcangeli if (unlikely(!PageTail(page))) { 109*91807063SAndrea Arcangeli /* __split_huge_page_refcount run before us */ 110*91807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 111*91807063SAndrea Arcangeli VM_BUG_ON(PageHead(page_head)); 112*91807063SAndrea Arcangeli out_put_head: 113*91807063SAndrea Arcangeli if (put_page_testzero(page_head)) 114*91807063SAndrea Arcangeli __put_single_page(page_head); 115*91807063SAndrea Arcangeli out_put_single: 116*91807063SAndrea Arcangeli if (put_page_testzero(page)) 117*91807063SAndrea Arcangeli __put_single_page(page); 118*91807063SAndrea Arcangeli return; 119*91807063SAndrea Arcangeli } 120*91807063SAndrea Arcangeli VM_BUG_ON(page_head != page->first_page); 121*91807063SAndrea Arcangeli /* 122*91807063SAndrea Arcangeli * We can release the refcount taken by 123*91807063SAndrea Arcangeli * get_page_unless_zero now that 124*91807063SAndrea Arcangeli * split_huge_page_refcount is blocked on the 125*91807063SAndrea Arcangeli * compound_lock. 126*91807063SAndrea Arcangeli */ 127*91807063SAndrea Arcangeli if (put_page_testzero(page_head)) 128*91807063SAndrea Arcangeli VM_BUG_ON(1); 129*91807063SAndrea Arcangeli /* __split_huge_page_refcount will wait now */ 130*91807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page->_count) <= 0); 131*91807063SAndrea Arcangeli atomic_dec(&page->_count); 132*91807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 133*91807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 134*91807063SAndrea Arcangeli if (put_page_testzero(page_head)) 135*91807063SAndrea Arcangeli __put_compound_page(page_head); 136*91807063SAndrea Arcangeli } else { 137*91807063SAndrea Arcangeli /* page_head is a dangling pointer */ 138*91807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 139*91807063SAndrea Arcangeli goto out_put_single; 140*91807063SAndrea Arcangeli } 141*91807063SAndrea Arcangeli } else if (put_page_testzero(page)) { 142*91807063SAndrea Arcangeli if (PageHead(page)) 143*91807063SAndrea Arcangeli __put_compound_page(page); 144*91807063SAndrea Arcangeli else 145*91807063SAndrea Arcangeli __put_single_page(page); 1461da177e4SLinus Torvalds } 1471da177e4SLinus Torvalds } 1488519fb30SNick Piggin 1498519fb30SNick Piggin void put_page(struct page *page) 1508519fb30SNick Piggin { 1518519fb30SNick Piggin if (unlikely(PageCompound(page))) 1528519fb30SNick Piggin put_compound_page(page); 1538519fb30SNick Piggin else if (put_page_testzero(page)) 154*91807063SAndrea Arcangeli __put_single_page(page); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds EXPORT_SYMBOL(put_page); 1571da177e4SLinus Torvalds 1581d7ea732SAlexander Zarochentsev /** 1597682486bSRandy Dunlap * put_pages_list() - release a list of pages 1607682486bSRandy Dunlap * @pages: list of pages threaded on page->lru 1611d7ea732SAlexander Zarochentsev * 1621d7ea732SAlexander Zarochentsev * Release a list of pages which are strung together on page.lru. Currently 1631d7ea732SAlexander Zarochentsev * used by read_cache_pages() and related error recovery code. 1641d7ea732SAlexander Zarochentsev */ 1651d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages) 1661d7ea732SAlexander Zarochentsev { 1671d7ea732SAlexander Zarochentsev while (!list_empty(pages)) { 1681d7ea732SAlexander Zarochentsev struct page *victim; 1691d7ea732SAlexander Zarochentsev 1701d7ea732SAlexander Zarochentsev victim = list_entry(pages->prev, struct page, lru); 1711d7ea732SAlexander Zarochentsev list_del(&victim->lru); 1721d7ea732SAlexander Zarochentsev page_cache_release(victim); 1731d7ea732SAlexander Zarochentsev } 1741d7ea732SAlexander Zarochentsev } 1751d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list); 1761d7ea732SAlexander Zarochentsev 1771da177e4SLinus Torvalds /* 178902aaed0SHisashi Hifumi * pagevec_move_tail() must be called with IRQ disabled. 179902aaed0SHisashi Hifumi * Otherwise this may cause nasty races. 180902aaed0SHisashi Hifumi */ 181902aaed0SHisashi Hifumi static void pagevec_move_tail(struct pagevec *pvec) 182902aaed0SHisashi Hifumi { 183902aaed0SHisashi Hifumi int i; 184902aaed0SHisashi Hifumi int pgmoved = 0; 185902aaed0SHisashi Hifumi struct zone *zone = NULL; 186902aaed0SHisashi Hifumi 187902aaed0SHisashi Hifumi for (i = 0; i < pagevec_count(pvec); i++) { 188902aaed0SHisashi Hifumi struct page *page = pvec->pages[i]; 189902aaed0SHisashi Hifumi struct zone *pagezone = page_zone(page); 190902aaed0SHisashi Hifumi 191902aaed0SHisashi Hifumi if (pagezone != zone) { 192902aaed0SHisashi Hifumi if (zone) 193902aaed0SHisashi Hifumi spin_unlock(&zone->lru_lock); 194902aaed0SHisashi Hifumi zone = pagezone; 195902aaed0SHisashi Hifumi spin_lock(&zone->lru_lock); 196902aaed0SHisashi Hifumi } 197894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 198401a8e1cSJohannes Weiner int lru = page_lru_base_type(page); 1994f98a2feSRik van Riel list_move_tail(&page->lru, &zone->lru[lru].list); 200902aaed0SHisashi Hifumi pgmoved++; 201902aaed0SHisashi Hifumi } 202902aaed0SHisashi Hifumi } 203902aaed0SHisashi Hifumi if (zone) 204902aaed0SHisashi Hifumi spin_unlock(&zone->lru_lock); 205902aaed0SHisashi Hifumi __count_vm_events(PGROTATED, pgmoved); 206902aaed0SHisashi Hifumi release_pages(pvec->pages, pvec->nr, pvec->cold); 207902aaed0SHisashi Hifumi pagevec_reinit(pvec); 208902aaed0SHisashi Hifumi } 209902aaed0SHisashi Hifumi 210902aaed0SHisashi Hifumi /* 2111da177e4SLinus Torvalds * Writeback is about to end against a page which has been marked for immediate 2121da177e4SLinus Torvalds * reclaim. If it still appears to be reclaimable, move it to the tail of the 213902aaed0SHisashi Hifumi * inactive list. 2141da177e4SLinus Torvalds */ 215ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page) 2161da177e4SLinus Torvalds { 217ac6aadb2SMiklos Szeredi if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 218894bc310SLee Schermerhorn !PageUnevictable(page) && PageLRU(page)) { 219902aaed0SHisashi Hifumi struct pagevec *pvec; 2201da177e4SLinus Torvalds unsigned long flags; 2211da177e4SLinus Torvalds 222902aaed0SHisashi Hifumi page_cache_get(page); 223902aaed0SHisashi Hifumi local_irq_save(flags); 224902aaed0SHisashi Hifumi pvec = &__get_cpu_var(lru_rotate_pvecs); 225902aaed0SHisashi Hifumi if (!pagevec_add(pvec, page)) 226902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 227902aaed0SHisashi Hifumi local_irq_restore(flags); 228ac6aadb2SMiklos Szeredi } 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds 2313e2f41f1SKOSAKI Motohiro static void update_page_reclaim_stat(struct zone *zone, struct page *page, 2323e2f41f1SKOSAKI Motohiro int file, int rotated) 2333e2f41f1SKOSAKI Motohiro { 2343e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; 2353e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *memcg_reclaim_stat; 2363e2f41f1SKOSAKI Motohiro 2373e2f41f1SKOSAKI Motohiro memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); 2383e2f41f1SKOSAKI Motohiro 2393e2f41f1SKOSAKI Motohiro reclaim_stat->recent_scanned[file]++; 2403e2f41f1SKOSAKI Motohiro if (rotated) 2413e2f41f1SKOSAKI Motohiro reclaim_stat->recent_rotated[file]++; 2423e2f41f1SKOSAKI Motohiro 2433e2f41f1SKOSAKI Motohiro if (!memcg_reclaim_stat) 2443e2f41f1SKOSAKI Motohiro return; 2453e2f41f1SKOSAKI Motohiro 2463e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_scanned[file]++; 2473e2f41f1SKOSAKI Motohiro if (rotated) 2483e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_rotated[file]++; 2493e2f41f1SKOSAKI Motohiro } 2503e2f41f1SKOSAKI Motohiro 2511da177e4SLinus Torvalds /* 2521da177e4SLinus Torvalds * FIXME: speed this up? 2531da177e4SLinus Torvalds */ 254920c7a5dSHarvey Harrison void activate_page(struct page *page) 2551da177e4SLinus Torvalds { 2561da177e4SLinus Torvalds struct zone *zone = page_zone(page); 2571da177e4SLinus Torvalds 2581da177e4SLinus Torvalds spin_lock_irq(&zone->lru_lock); 259894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 2604f98a2feSRik van Riel int file = page_is_file_cache(page); 261401a8e1cSJohannes Weiner int lru = page_lru_base_type(page); 2624f98a2feSRik van Riel del_page_from_lru_list(zone, page, lru); 2634f98a2feSRik van Riel 2641da177e4SLinus Torvalds SetPageActive(page); 2654f98a2feSRik van Riel lru += LRU_ACTIVE; 2664f98a2feSRik van Riel add_page_to_lru_list(zone, page, lru); 267f8891e5eSChristoph Lameter __count_vm_event(PGACTIVATE); 2684f98a2feSRik van Riel 2696c0b1351SJohannes Weiner update_page_reclaim_stat(zone, page, file, 1); 2701da177e4SLinus Torvalds } 2711da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds 2741da177e4SLinus Torvalds /* 2751da177e4SLinus Torvalds * Mark a page as having seen activity. 2761da177e4SLinus Torvalds * 2771da177e4SLinus Torvalds * inactive,unreferenced -> inactive,referenced 2781da177e4SLinus Torvalds * inactive,referenced -> active,unreferenced 2791da177e4SLinus Torvalds * active,unreferenced -> active,referenced 2801da177e4SLinus Torvalds */ 281920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page) 2821da177e4SLinus Torvalds { 283894bc310SLee Schermerhorn if (!PageActive(page) && !PageUnevictable(page) && 284894bc310SLee Schermerhorn PageReferenced(page) && PageLRU(page)) { 2851da177e4SLinus Torvalds activate_page(page); 2861da177e4SLinus Torvalds ClearPageReferenced(page); 2871da177e4SLinus Torvalds } else if (!PageReferenced(page)) { 2881da177e4SLinus Torvalds SetPageReferenced(page); 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed); 2931da177e4SLinus Torvalds 294f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru) 2951da177e4SLinus Torvalds { 296f04e9ebbSKOSAKI Motohiro struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds page_cache_get(page); 2991da177e4SLinus Torvalds if (!pagevec_add(pvec, page)) 300f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 3011da177e4SLinus Torvalds put_cpu_var(lru_add_pvecs); 3021da177e4SLinus Torvalds } 30347846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add); 3041da177e4SLinus Torvalds 305f04e9ebbSKOSAKI Motohiro /** 306f04e9ebbSKOSAKI Motohiro * lru_cache_add_lru - add a page to a page list 307f04e9ebbSKOSAKI Motohiro * @page: the page to be added to the LRU. 308f04e9ebbSKOSAKI Motohiro * @lru: the LRU list to which the page is added. 309f04e9ebbSKOSAKI Motohiro */ 310f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru) 3111da177e4SLinus Torvalds { 312f04e9ebbSKOSAKI Motohiro if (PageActive(page)) { 313894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 314f04e9ebbSKOSAKI Motohiro ClearPageActive(page); 315894bc310SLee Schermerhorn } else if (PageUnevictable(page)) { 316894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 317894bc310SLee Schermerhorn ClearPageUnevictable(page); 318f04e9ebbSKOSAKI Motohiro } 3191da177e4SLinus Torvalds 320894bc310SLee Schermerhorn VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); 321f04e9ebbSKOSAKI Motohiro __lru_cache_add(page, lru); 3221da177e4SLinus Torvalds } 3231da177e4SLinus Torvalds 324894bc310SLee Schermerhorn /** 325894bc310SLee Schermerhorn * add_page_to_unevictable_list - add a page to the unevictable list 326894bc310SLee Schermerhorn * @page: the page to be added to the unevictable list 327894bc310SLee Schermerhorn * 328894bc310SLee Schermerhorn * Add page directly to its zone's unevictable list. To avoid races with 329894bc310SLee Schermerhorn * tasks that might be making the page evictable, through eg. munlock, 330894bc310SLee Schermerhorn * munmap or exit, while it's not on the lru, we want to add the page 331894bc310SLee Schermerhorn * while it's locked or otherwise "invisible" to other tasks. This is 332894bc310SLee Schermerhorn * difficult to do when using the pagevec cache, so bypass that. 333894bc310SLee Schermerhorn */ 334894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page) 335894bc310SLee Schermerhorn { 336894bc310SLee Schermerhorn struct zone *zone = page_zone(page); 337894bc310SLee Schermerhorn 338894bc310SLee Schermerhorn spin_lock_irq(&zone->lru_lock); 339894bc310SLee Schermerhorn SetPageUnevictable(page); 340894bc310SLee Schermerhorn SetPageLRU(page); 341894bc310SLee Schermerhorn add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); 342894bc310SLee Schermerhorn spin_unlock_irq(&zone->lru_lock); 343894bc310SLee Schermerhorn } 344894bc310SLee Schermerhorn 345902aaed0SHisashi Hifumi /* 346902aaed0SHisashi Hifumi * Drain pages out of the cpu's pagevecs. 347902aaed0SHisashi Hifumi * Either "cpu" is the current CPU, and preemption has already been 348902aaed0SHisashi Hifumi * disabled; or "cpu" is being hot-unplugged, and is already dead. 349902aaed0SHisashi Hifumi */ 350902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu) 3511da177e4SLinus Torvalds { 352f04e9ebbSKOSAKI Motohiro struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); 353902aaed0SHisashi Hifumi struct pagevec *pvec; 354f04e9ebbSKOSAKI Motohiro int lru; 3551da177e4SLinus Torvalds 356f04e9ebbSKOSAKI Motohiro for_each_lru(lru) { 357f04e9ebbSKOSAKI Motohiro pvec = &pvecs[lru - LRU_BASE]; 3581da177e4SLinus Torvalds if (pagevec_count(pvec)) 359f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 360f04e9ebbSKOSAKI Motohiro } 361902aaed0SHisashi Hifumi 362902aaed0SHisashi Hifumi pvec = &per_cpu(lru_rotate_pvecs, cpu); 363902aaed0SHisashi Hifumi if (pagevec_count(pvec)) { 364902aaed0SHisashi Hifumi unsigned long flags; 365902aaed0SHisashi Hifumi 366902aaed0SHisashi Hifumi /* No harm done if a racing interrupt already did this */ 367902aaed0SHisashi Hifumi local_irq_save(flags); 368902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 369902aaed0SHisashi Hifumi local_irq_restore(flags); 370902aaed0SHisashi Hifumi } 37180bfed90SAndrew Morton } 37280bfed90SAndrew Morton 37380bfed90SAndrew Morton void lru_add_drain(void) 37480bfed90SAndrew Morton { 375902aaed0SHisashi Hifumi drain_cpu_pagevecs(get_cpu()); 37680bfed90SAndrew Morton put_cpu(); 3771da177e4SLinus Torvalds } 3781da177e4SLinus Torvalds 379c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy) 380053837fcSNick Piggin { 381053837fcSNick Piggin lru_add_drain(); 382053837fcSNick Piggin } 383053837fcSNick Piggin 384053837fcSNick Piggin /* 385053837fcSNick Piggin * Returns 0 for success 386053837fcSNick Piggin */ 387053837fcSNick Piggin int lru_add_drain_all(void) 388053837fcSNick Piggin { 389c4028958SDavid Howells return schedule_on_each_cpu(lru_add_drain_per_cpu); 390053837fcSNick Piggin } 391053837fcSNick Piggin 3921da177e4SLinus Torvalds /* 3931da177e4SLinus Torvalds * Batched page_cache_release(). Decrement the reference count on all the 3941da177e4SLinus Torvalds * passed pages. If it fell to zero then remove the page from the LRU and 3951da177e4SLinus Torvalds * free it. 3961da177e4SLinus Torvalds * 3971da177e4SLinus Torvalds * Avoid taking zone->lru_lock if possible, but if it is taken, retain it 3981da177e4SLinus Torvalds * for the remainder of the operation. 3991da177e4SLinus Torvalds * 400ab33dc09SFernando Luis Vazquez Cao * The locking in this function is against shrink_inactive_list(): we recheck 401ab33dc09SFernando Luis Vazquez Cao * the page count inside the lock to see whether shrink_inactive_list() 402ab33dc09SFernando Luis Vazquez Cao * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() 403ab33dc09SFernando Luis Vazquez Cao * will free it. 4041da177e4SLinus Torvalds */ 4051da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold) 4061da177e4SLinus Torvalds { 4071da177e4SLinus Torvalds int i; 4081da177e4SLinus Torvalds struct pagevec pages_to_free; 4091da177e4SLinus Torvalds struct zone *zone = NULL; 410902aaed0SHisashi Hifumi unsigned long uninitialized_var(flags); 4111da177e4SLinus Torvalds 4121da177e4SLinus Torvalds pagevec_init(&pages_to_free, cold); 4131da177e4SLinus Torvalds for (i = 0; i < nr; i++) { 4141da177e4SLinus Torvalds struct page *page = pages[i]; 4151da177e4SLinus Torvalds 4168519fb30SNick Piggin if (unlikely(PageCompound(page))) { 4178519fb30SNick Piggin if (zone) { 418902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 4198519fb30SNick Piggin zone = NULL; 4208519fb30SNick Piggin } 4218519fb30SNick Piggin put_compound_page(page); 4228519fb30SNick Piggin continue; 4238519fb30SNick Piggin } 4248519fb30SNick Piggin 425b5810039SNick Piggin if (!put_page_testzero(page)) 4261da177e4SLinus Torvalds continue; 4271da177e4SLinus Torvalds 42846453a6eSNick Piggin if (PageLRU(page)) { 42946453a6eSNick Piggin struct zone *pagezone = page_zone(page); 430894bc310SLee Schermerhorn 4311da177e4SLinus Torvalds if (pagezone != zone) { 4321da177e4SLinus Torvalds if (zone) 433902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, 434902aaed0SHisashi Hifumi flags); 4351da177e4SLinus Torvalds zone = pagezone; 436902aaed0SHisashi Hifumi spin_lock_irqsave(&zone->lru_lock, flags); 4371da177e4SLinus Torvalds } 438725d704eSNick Piggin VM_BUG_ON(!PageLRU(page)); 43967453911SNick Piggin __ClearPageLRU(page); 4401da177e4SLinus Torvalds del_page_from_lru(zone, page); 44146453a6eSNick Piggin } 44246453a6eSNick Piggin 4431da177e4SLinus Torvalds if (!pagevec_add(&pages_to_free, page)) { 44446453a6eSNick Piggin if (zone) { 445902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 44646453a6eSNick Piggin zone = NULL; 44746453a6eSNick Piggin } 4481da177e4SLinus Torvalds __pagevec_free(&pages_to_free); 4491da177e4SLinus Torvalds pagevec_reinit(&pages_to_free); 4501da177e4SLinus Torvalds } 4511da177e4SLinus Torvalds } 4521da177e4SLinus Torvalds if (zone) 453902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 4541da177e4SLinus Torvalds 4551da177e4SLinus Torvalds pagevec_free(&pages_to_free); 4561da177e4SLinus Torvalds } 4570be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages); 4581da177e4SLinus Torvalds 4591da177e4SLinus Torvalds /* 4601da177e4SLinus Torvalds * The pages which we're about to release may be in the deferred lru-addition 4611da177e4SLinus Torvalds * queues. That would prevent them from really being freed right now. That's 4621da177e4SLinus Torvalds * OK from a correctness point of view but is inefficient - those pages may be 4631da177e4SLinus Torvalds * cache-warm and we want to give them back to the page allocator ASAP. 4641da177e4SLinus Torvalds * 4651da177e4SLinus Torvalds * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 4661da177e4SLinus Torvalds * and __pagevec_lru_add_active() call release_pages() directly to avoid 4671da177e4SLinus Torvalds * mutual recursion. 4681da177e4SLinus Torvalds */ 4691da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec) 4701da177e4SLinus Torvalds { 4711da177e4SLinus Torvalds lru_add_drain(); 4721da177e4SLinus Torvalds release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 4731da177e4SLinus Torvalds pagevec_reinit(pvec); 4741da177e4SLinus Torvalds } 4751da177e4SLinus Torvalds 4767f285701SSteve French EXPORT_SYMBOL(__pagevec_release); 4777f285701SSteve French 4781da177e4SLinus Torvalds /* 4791da177e4SLinus Torvalds * Add the passed pages to the LRU, then drop the caller's refcount 4801da177e4SLinus Torvalds * on them. Reinitialises the caller's pagevec. 4811da177e4SLinus Torvalds */ 482f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) 4831da177e4SLinus Torvalds { 4841da177e4SLinus Torvalds int i; 4851da177e4SLinus Torvalds struct zone *zone = NULL; 4866e901571SKOSAKI Motohiro 487894bc310SLee Schermerhorn VM_BUG_ON(is_unevictable_lru(lru)); 4881da177e4SLinus Torvalds 4891da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 4901da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 4911da177e4SLinus Torvalds struct zone *pagezone = page_zone(page); 4929ff473b9SRik van Riel int file; 4933e2f41f1SKOSAKI Motohiro int active; 4941da177e4SLinus Torvalds 4951da177e4SLinus Torvalds if (pagezone != zone) { 4961da177e4SLinus Torvalds if (zone) 4971da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 4981da177e4SLinus Torvalds zone = pagezone; 4991da177e4SLinus Torvalds spin_lock_irq(&zone->lru_lock); 5001da177e4SLinus Torvalds } 501894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 502894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 503725d704eSNick Piggin VM_BUG_ON(PageLRU(page)); 5048d438f96SNick Piggin SetPageLRU(page); 5053e2f41f1SKOSAKI Motohiro active = is_active_lru(lru); 5069ff473b9SRik van Riel file = is_file_lru(lru); 5073e2f41f1SKOSAKI Motohiro if (active) 5084c84cacfSNick Piggin SetPageActive(page); 5093e2f41f1SKOSAKI Motohiro update_page_reclaim_stat(zone, page, file, active); 510f04e9ebbSKOSAKI Motohiro add_page_to_lru_list(zone, page, lru); 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds if (zone) 5131da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 5141da177e4SLinus Torvalds release_pages(pvec->pages, pvec->nr, pvec->cold); 5151da177e4SLinus Torvalds pagevec_reinit(pvec); 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds 518f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add); 519f04e9ebbSKOSAKI Motohiro 5201da177e4SLinus Torvalds /* 5211da177e4SLinus Torvalds * Try to drop buffers from the pages in a pagevec 5221da177e4SLinus Torvalds */ 5231da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec) 5241da177e4SLinus Torvalds { 5251da177e4SLinus Torvalds int i; 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 5281da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 5291da177e4SLinus Torvalds 530266cf658SDavid Howells if (page_has_private(page) && trylock_page(page)) { 531266cf658SDavid Howells if (page_has_private(page)) 5321da177e4SLinus Torvalds try_to_release_page(page, 0); 5331da177e4SLinus Torvalds unlock_page(page); 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds } 5361da177e4SLinus Torvalds } 5371da177e4SLinus Torvalds 5381da177e4SLinus Torvalds /** 5391da177e4SLinus Torvalds * pagevec_lookup - gang pagecache lookup 5401da177e4SLinus Torvalds * @pvec: Where the resulting pages are placed 5411da177e4SLinus Torvalds * @mapping: The address_space to search 5421da177e4SLinus Torvalds * @start: The starting page index 5431da177e4SLinus Torvalds * @nr_pages: The maximum number of pages 5441da177e4SLinus Torvalds * 5451da177e4SLinus Torvalds * pagevec_lookup() will search for and return a group of up to @nr_pages pages 5461da177e4SLinus Torvalds * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a 5471da177e4SLinus Torvalds * reference against the pages in @pvec. 5481da177e4SLinus Torvalds * 5491da177e4SLinus Torvalds * The search returns a group of mapping-contiguous pages with ascending 5501da177e4SLinus Torvalds * indexes. There may be holes in the indices due to not-present pages. 5511da177e4SLinus Torvalds * 5521da177e4SLinus Torvalds * pagevec_lookup() returns the number of pages which were found. 5531da177e4SLinus Torvalds */ 5541da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 5551da177e4SLinus Torvalds pgoff_t start, unsigned nr_pages) 5561da177e4SLinus Torvalds { 5571da177e4SLinus Torvalds pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); 5581da177e4SLinus Torvalds return pagevec_count(pvec); 5591da177e4SLinus Torvalds } 5601da177e4SLinus Torvalds 56178539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup); 56278539fdfSChristoph Hellwig 5631da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 5641da177e4SLinus Torvalds pgoff_t *index, int tag, unsigned nr_pages) 5651da177e4SLinus Torvalds { 5661da177e4SLinus Torvalds pvec->nr = find_get_pages_tag(mapping, index, tag, 5671da177e4SLinus Torvalds nr_pages, pvec->pages); 5681da177e4SLinus Torvalds return pagevec_count(pvec); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5717f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag); 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds /* 5741da177e4SLinus Torvalds * Perform any setup for the swap system 5751da177e4SLinus Torvalds */ 5761da177e4SLinus Torvalds void __init swap_setup(void) 5771da177e4SLinus Torvalds { 5784481374cSJan Beulich unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); 5791da177e4SLinus Torvalds 580e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP 581e0bf68ddSPeter Zijlstra bdi_init(swapper_space.backing_dev_info); 582e0bf68ddSPeter Zijlstra #endif 583e0bf68ddSPeter Zijlstra 5841da177e4SLinus Torvalds /* Use a smaller cluster for small-memory machines */ 5851da177e4SLinus Torvalds if (megs < 16) 5861da177e4SLinus Torvalds page_cluster = 2; 5871da177e4SLinus Torvalds else 5881da177e4SLinus Torvalds page_cluster = 3; 5891da177e4SLinus Torvalds /* 5901da177e4SLinus Torvalds * Right now other parts of the system means that we 5911da177e4SLinus Torvalds * _really_ don't want to cluster much more 5921da177e4SLinus Torvalds */ 5931da177e4SLinus Torvalds } 594