11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds /* 8183ff22bSSimon Arlott * This file contains the default values for the operation of the 91da177e4SLinus Torvalds * Linux VM subsystem. Fine-tuning documentation can be found in 101da177e4SLinus Torvalds * Documentation/sysctl/vm.txt. 111da177e4SLinus Torvalds * Started 18.12.91 121da177e4SLinus Torvalds * Swap aging added 23.2.95, Stephen Tweedie. 131da177e4SLinus Torvalds * Buffermem limits added 12.3.98, Rik van Riel. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/sched.h> 181da177e4SLinus Torvalds #include <linux/kernel_stat.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/mman.h> 211da177e4SLinus Torvalds #include <linux/pagemap.h> 221da177e4SLinus Torvalds #include <linux/pagevec.h> 231da177e4SLinus Torvalds #include <linux/init.h> 241da177e4SLinus Torvalds #include <linux/module.h> 251da177e4SLinus Torvalds #include <linux/mm_inline.h> 261da177e4SLinus Torvalds #include <linux/buffer_head.h> /* for try_to_release_page() */ 271da177e4SLinus Torvalds #include <linux/percpu_counter.h> 281da177e4SLinus Torvalds #include <linux/percpu.h> 291da177e4SLinus Torvalds #include <linux/cpu.h> 301da177e4SLinus Torvalds #include <linux/notifier.h> 31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h> 3266e1707bSBalbir Singh #include <linux/memcontrol.h> 331da177e4SLinus Torvalds 34*64d6519dSLee Schermerhorn #include "internal.h" 35*64d6519dSLee Schermerhorn 361da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */ 371da177e4SLinus Torvalds int page_cluster; 381da177e4SLinus Torvalds 39f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 40f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 41902aaed0SHisashi Hifumi 42b221385bSAdrian Bunk /* 43b221385bSAdrian Bunk * This path almost never happens for VM activity - pages are normally 44b221385bSAdrian Bunk * freed via pagevecs. But it gets used by networking. 45b221385bSAdrian Bunk */ 46920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page) 47b221385bSAdrian Bunk { 48b221385bSAdrian Bunk if (PageLRU(page)) { 49b221385bSAdrian Bunk unsigned long flags; 50b221385bSAdrian Bunk struct zone *zone = page_zone(page); 51b221385bSAdrian Bunk 52b221385bSAdrian Bunk spin_lock_irqsave(&zone->lru_lock, flags); 53b221385bSAdrian Bunk VM_BUG_ON(!PageLRU(page)); 54b221385bSAdrian Bunk __ClearPageLRU(page); 55b221385bSAdrian Bunk del_page_from_lru(zone, page); 56b221385bSAdrian Bunk spin_unlock_irqrestore(&zone->lru_lock, flags); 57b221385bSAdrian Bunk } 58b221385bSAdrian Bunk free_hot_page(page); 59b221385bSAdrian Bunk } 60b221385bSAdrian Bunk 618519fb30SNick Piggin static void put_compound_page(struct page *page) 621da177e4SLinus Torvalds { 63d85f3385SChristoph Lameter page = compound_head(page); 641da177e4SLinus Torvalds if (put_page_testzero(page)) { 6533f2ef89SAndy Whitcroft compound_page_dtor *dtor; 661da177e4SLinus Torvalds 6733f2ef89SAndy Whitcroft dtor = get_compound_page_dtor(page); 681da177e4SLinus Torvalds (*dtor)(page); 691da177e4SLinus Torvalds } 701da177e4SLinus Torvalds } 718519fb30SNick Piggin 728519fb30SNick Piggin void put_page(struct page *page) 738519fb30SNick Piggin { 748519fb30SNick Piggin if (unlikely(PageCompound(page))) 758519fb30SNick Piggin put_compound_page(page); 768519fb30SNick Piggin else if (put_page_testzero(page)) 771da177e4SLinus Torvalds __page_cache_release(page); 781da177e4SLinus Torvalds } 791da177e4SLinus Torvalds EXPORT_SYMBOL(put_page); 801da177e4SLinus Torvalds 811d7ea732SAlexander Zarochentsev /** 827682486bSRandy Dunlap * put_pages_list() - release a list of pages 837682486bSRandy Dunlap * @pages: list of pages threaded on page->lru 841d7ea732SAlexander Zarochentsev * 851d7ea732SAlexander Zarochentsev * Release a list of pages which are strung together on page.lru. Currently 861d7ea732SAlexander Zarochentsev * used by read_cache_pages() and related error recovery code. 871d7ea732SAlexander Zarochentsev */ 881d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages) 891d7ea732SAlexander Zarochentsev { 901d7ea732SAlexander Zarochentsev while (!list_empty(pages)) { 911d7ea732SAlexander Zarochentsev struct page *victim; 921d7ea732SAlexander Zarochentsev 931d7ea732SAlexander Zarochentsev victim = list_entry(pages->prev, struct page, lru); 941d7ea732SAlexander Zarochentsev list_del(&victim->lru); 951d7ea732SAlexander Zarochentsev page_cache_release(victim); 961d7ea732SAlexander Zarochentsev } 971d7ea732SAlexander Zarochentsev } 981d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list); 991d7ea732SAlexander Zarochentsev 1001da177e4SLinus Torvalds /* 101902aaed0SHisashi Hifumi * pagevec_move_tail() must be called with IRQ disabled. 102902aaed0SHisashi Hifumi * Otherwise this may cause nasty races. 103902aaed0SHisashi Hifumi */ 104902aaed0SHisashi Hifumi static void pagevec_move_tail(struct pagevec *pvec) 105902aaed0SHisashi Hifumi { 106902aaed0SHisashi Hifumi int i; 107902aaed0SHisashi Hifumi int pgmoved = 0; 108902aaed0SHisashi Hifumi struct zone *zone = NULL; 109902aaed0SHisashi Hifumi 110902aaed0SHisashi Hifumi for (i = 0; i < pagevec_count(pvec); i++) { 111902aaed0SHisashi Hifumi struct page *page = pvec->pages[i]; 112902aaed0SHisashi Hifumi struct zone *pagezone = page_zone(page); 113902aaed0SHisashi Hifumi 114902aaed0SHisashi Hifumi if (pagezone != zone) { 115902aaed0SHisashi Hifumi if (zone) 116902aaed0SHisashi Hifumi spin_unlock(&zone->lru_lock); 117902aaed0SHisashi Hifumi zone = pagezone; 118902aaed0SHisashi Hifumi spin_lock(&zone->lru_lock); 119902aaed0SHisashi Hifumi } 120894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 1214f98a2feSRik van Riel int lru = page_is_file_cache(page); 1224f98a2feSRik van Riel list_move_tail(&page->lru, &zone->lru[lru].list); 123902aaed0SHisashi Hifumi pgmoved++; 124902aaed0SHisashi Hifumi } 125902aaed0SHisashi Hifumi } 126902aaed0SHisashi Hifumi if (zone) 127902aaed0SHisashi Hifumi spin_unlock(&zone->lru_lock); 128902aaed0SHisashi Hifumi __count_vm_events(PGROTATED, pgmoved); 129902aaed0SHisashi Hifumi release_pages(pvec->pages, pvec->nr, pvec->cold); 130902aaed0SHisashi Hifumi pagevec_reinit(pvec); 131902aaed0SHisashi Hifumi } 132902aaed0SHisashi Hifumi 133902aaed0SHisashi Hifumi /* 1341da177e4SLinus Torvalds * Writeback is about to end against a page which has been marked for immediate 1351da177e4SLinus Torvalds * reclaim. If it still appears to be reclaimable, move it to the tail of the 136902aaed0SHisashi Hifumi * inactive list. 1371da177e4SLinus Torvalds */ 138ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page) 1391da177e4SLinus Torvalds { 140ac6aadb2SMiklos Szeredi if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 141894bc310SLee Schermerhorn !PageUnevictable(page) && PageLRU(page)) { 142902aaed0SHisashi Hifumi struct pagevec *pvec; 1431da177e4SLinus Torvalds unsigned long flags; 1441da177e4SLinus Torvalds 145902aaed0SHisashi Hifumi page_cache_get(page); 146902aaed0SHisashi Hifumi local_irq_save(flags); 147902aaed0SHisashi Hifumi pvec = &__get_cpu_var(lru_rotate_pvecs); 148902aaed0SHisashi Hifumi if (!pagevec_add(pvec, page)) 149902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 150902aaed0SHisashi Hifumi local_irq_restore(flags); 151ac6aadb2SMiklos Szeredi } 1521da177e4SLinus Torvalds } 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds /* 1551da177e4SLinus Torvalds * FIXME: speed this up? 1561da177e4SLinus Torvalds */ 157920c7a5dSHarvey Harrison void activate_page(struct page *page) 1581da177e4SLinus Torvalds { 1591da177e4SLinus Torvalds struct zone *zone = page_zone(page); 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds spin_lock_irq(&zone->lru_lock); 162894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 1634f98a2feSRik van Riel int file = page_is_file_cache(page); 1644f98a2feSRik van Riel int lru = LRU_BASE + file; 1654f98a2feSRik van Riel del_page_from_lru_list(zone, page, lru); 1664f98a2feSRik van Riel 1671da177e4SLinus Torvalds SetPageActive(page); 1684f98a2feSRik van Riel lru += LRU_ACTIVE; 1694f98a2feSRik van Riel add_page_to_lru_list(zone, page, lru); 170f8891e5eSChristoph Lameter __count_vm_event(PGACTIVATE); 171894bc310SLee Schermerhorn mem_cgroup_move_lists(page, lru); 1724f98a2feSRik van Riel 1734f98a2feSRik van Riel zone->recent_rotated[!!file]++; 1744f98a2feSRik van Riel zone->recent_scanned[!!file]++; 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 1771da177e4SLinus Torvalds } 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds /* 1801da177e4SLinus Torvalds * Mark a page as having seen activity. 1811da177e4SLinus Torvalds * 1821da177e4SLinus Torvalds * inactive,unreferenced -> inactive,referenced 1831da177e4SLinus Torvalds * inactive,referenced -> active,unreferenced 1841da177e4SLinus Torvalds * active,unreferenced -> active,referenced 1851da177e4SLinus Torvalds */ 186920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page) 1871da177e4SLinus Torvalds { 188894bc310SLee Schermerhorn if (!PageActive(page) && !PageUnevictable(page) && 189894bc310SLee Schermerhorn PageReferenced(page) && PageLRU(page)) { 1901da177e4SLinus Torvalds activate_page(page); 1911da177e4SLinus Torvalds ClearPageReferenced(page); 1921da177e4SLinus Torvalds } else if (!PageReferenced(page)) { 1931da177e4SLinus Torvalds SetPageReferenced(page); 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds } 1961da177e4SLinus Torvalds 1971da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed); 1981da177e4SLinus Torvalds 199f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru) 2001da177e4SLinus Torvalds { 201f04e9ebbSKOSAKI Motohiro struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds page_cache_get(page); 2041da177e4SLinus Torvalds if (!pagevec_add(pvec, page)) 205f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 2061da177e4SLinus Torvalds put_cpu_var(lru_add_pvecs); 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds 209f04e9ebbSKOSAKI Motohiro /** 210f04e9ebbSKOSAKI Motohiro * lru_cache_add_lru - add a page to a page list 211f04e9ebbSKOSAKI Motohiro * @page: the page to be added to the LRU. 212f04e9ebbSKOSAKI Motohiro * @lru: the LRU list to which the page is added. 213f04e9ebbSKOSAKI Motohiro */ 214f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru) 2151da177e4SLinus Torvalds { 216f04e9ebbSKOSAKI Motohiro if (PageActive(page)) { 217894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 218f04e9ebbSKOSAKI Motohiro ClearPageActive(page); 219894bc310SLee Schermerhorn } else if (PageUnevictable(page)) { 220894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 221894bc310SLee Schermerhorn ClearPageUnevictable(page); 222f04e9ebbSKOSAKI Motohiro } 2231da177e4SLinus Torvalds 224894bc310SLee Schermerhorn VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); 225f04e9ebbSKOSAKI Motohiro __lru_cache_add(page, lru); 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds 228894bc310SLee Schermerhorn /** 229894bc310SLee Schermerhorn * add_page_to_unevictable_list - add a page to the unevictable list 230894bc310SLee Schermerhorn * @page: the page to be added to the unevictable list 231894bc310SLee Schermerhorn * 232894bc310SLee Schermerhorn * Add page directly to its zone's unevictable list. To avoid races with 233894bc310SLee Schermerhorn * tasks that might be making the page evictable, through eg. munlock, 234894bc310SLee Schermerhorn * munmap or exit, while it's not on the lru, we want to add the page 235894bc310SLee Schermerhorn * while it's locked or otherwise "invisible" to other tasks. This is 236894bc310SLee Schermerhorn * difficult to do when using the pagevec cache, so bypass that. 237894bc310SLee Schermerhorn */ 238894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page) 239894bc310SLee Schermerhorn { 240894bc310SLee Schermerhorn struct zone *zone = page_zone(page); 241894bc310SLee Schermerhorn 242894bc310SLee Schermerhorn spin_lock_irq(&zone->lru_lock); 243894bc310SLee Schermerhorn SetPageUnevictable(page); 244894bc310SLee Schermerhorn SetPageLRU(page); 245894bc310SLee Schermerhorn add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); 246894bc310SLee Schermerhorn spin_unlock_irq(&zone->lru_lock); 247894bc310SLee Schermerhorn } 248894bc310SLee Schermerhorn 249*64d6519dSLee Schermerhorn /** 250*64d6519dSLee Schermerhorn * lru_cache_add_active_or_unevictable 251*64d6519dSLee Schermerhorn * @page: the page to be added to LRU 252*64d6519dSLee Schermerhorn * @vma: vma in which page is mapped for determining reclaimability 253*64d6519dSLee Schermerhorn * 254*64d6519dSLee Schermerhorn * place @page on active or unevictable LRU list, depending on 255*64d6519dSLee Schermerhorn * page_evictable(). Note that if the page is not evictable, 256*64d6519dSLee Schermerhorn * it goes directly back onto it's zone's unevictable list. It does 257*64d6519dSLee Schermerhorn * NOT use a per cpu pagevec. 258*64d6519dSLee Schermerhorn */ 259*64d6519dSLee Schermerhorn void lru_cache_add_active_or_unevictable(struct page *page, 260*64d6519dSLee Schermerhorn struct vm_area_struct *vma) 261*64d6519dSLee Schermerhorn { 262*64d6519dSLee Schermerhorn if (page_evictable(page, vma)) 263*64d6519dSLee Schermerhorn lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page)); 264*64d6519dSLee Schermerhorn else 265*64d6519dSLee Schermerhorn add_page_to_unevictable_list(page); 266*64d6519dSLee Schermerhorn } 267*64d6519dSLee Schermerhorn 268902aaed0SHisashi Hifumi /* 269902aaed0SHisashi Hifumi * Drain pages out of the cpu's pagevecs. 270902aaed0SHisashi Hifumi * Either "cpu" is the current CPU, and preemption has already been 271902aaed0SHisashi Hifumi * disabled; or "cpu" is being hot-unplugged, and is already dead. 272902aaed0SHisashi Hifumi */ 273902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu) 2741da177e4SLinus Torvalds { 275f04e9ebbSKOSAKI Motohiro struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); 276902aaed0SHisashi Hifumi struct pagevec *pvec; 277f04e9ebbSKOSAKI Motohiro int lru; 2781da177e4SLinus Torvalds 279f04e9ebbSKOSAKI Motohiro for_each_lru(lru) { 280f04e9ebbSKOSAKI Motohiro pvec = &pvecs[lru - LRU_BASE]; 2811da177e4SLinus Torvalds if (pagevec_count(pvec)) 282f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 283f04e9ebbSKOSAKI Motohiro } 284902aaed0SHisashi Hifumi 285902aaed0SHisashi Hifumi pvec = &per_cpu(lru_rotate_pvecs, cpu); 286902aaed0SHisashi Hifumi if (pagevec_count(pvec)) { 287902aaed0SHisashi Hifumi unsigned long flags; 288902aaed0SHisashi Hifumi 289902aaed0SHisashi Hifumi /* No harm done if a racing interrupt already did this */ 290902aaed0SHisashi Hifumi local_irq_save(flags); 291902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 292902aaed0SHisashi Hifumi local_irq_restore(flags); 293902aaed0SHisashi Hifumi } 29480bfed90SAndrew Morton } 29580bfed90SAndrew Morton 29680bfed90SAndrew Morton void lru_add_drain(void) 29780bfed90SAndrew Morton { 298902aaed0SHisashi Hifumi drain_cpu_pagevecs(get_cpu()); 29980bfed90SAndrew Morton put_cpu(); 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds 302b291f000SNick Piggin #if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU) 303c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy) 304053837fcSNick Piggin { 305053837fcSNick Piggin lru_add_drain(); 306053837fcSNick Piggin } 307053837fcSNick Piggin 308053837fcSNick Piggin /* 309053837fcSNick Piggin * Returns 0 for success 310053837fcSNick Piggin */ 311053837fcSNick Piggin int lru_add_drain_all(void) 312053837fcSNick Piggin { 313c4028958SDavid Howells return schedule_on_each_cpu(lru_add_drain_per_cpu); 314053837fcSNick Piggin } 315053837fcSNick Piggin 316053837fcSNick Piggin #else 317053837fcSNick Piggin 318053837fcSNick Piggin /* 319053837fcSNick Piggin * Returns 0 for success 320053837fcSNick Piggin */ 321053837fcSNick Piggin int lru_add_drain_all(void) 322053837fcSNick Piggin { 323053837fcSNick Piggin lru_add_drain(); 324053837fcSNick Piggin return 0; 325053837fcSNick Piggin } 326053837fcSNick Piggin #endif 327053837fcSNick Piggin 3281da177e4SLinus Torvalds /* 3291da177e4SLinus Torvalds * Batched page_cache_release(). Decrement the reference count on all the 3301da177e4SLinus Torvalds * passed pages. If it fell to zero then remove the page from the LRU and 3311da177e4SLinus Torvalds * free it. 3321da177e4SLinus Torvalds * 3331da177e4SLinus Torvalds * Avoid taking zone->lru_lock if possible, but if it is taken, retain it 3341da177e4SLinus Torvalds * for the remainder of the operation. 3351da177e4SLinus Torvalds * 336ab33dc09SFernando Luis Vazquez Cao * The locking in this function is against shrink_inactive_list(): we recheck 337ab33dc09SFernando Luis Vazquez Cao * the page count inside the lock to see whether shrink_inactive_list() 338ab33dc09SFernando Luis Vazquez Cao * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() 339ab33dc09SFernando Luis Vazquez Cao * will free it. 3401da177e4SLinus Torvalds */ 3411da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold) 3421da177e4SLinus Torvalds { 3431da177e4SLinus Torvalds int i; 3441da177e4SLinus Torvalds struct pagevec pages_to_free; 3451da177e4SLinus Torvalds struct zone *zone = NULL; 346902aaed0SHisashi Hifumi unsigned long uninitialized_var(flags); 3471da177e4SLinus Torvalds 3481da177e4SLinus Torvalds pagevec_init(&pages_to_free, cold); 3491da177e4SLinus Torvalds for (i = 0; i < nr; i++) { 3501da177e4SLinus Torvalds struct page *page = pages[i]; 3511da177e4SLinus Torvalds 3528519fb30SNick Piggin if (unlikely(PageCompound(page))) { 3538519fb30SNick Piggin if (zone) { 354902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 3558519fb30SNick Piggin zone = NULL; 3568519fb30SNick Piggin } 3578519fb30SNick Piggin put_compound_page(page); 3588519fb30SNick Piggin continue; 3598519fb30SNick Piggin } 3608519fb30SNick Piggin 361b5810039SNick Piggin if (!put_page_testzero(page)) 3621da177e4SLinus Torvalds continue; 3631da177e4SLinus Torvalds 36446453a6eSNick Piggin if (PageLRU(page)) { 36546453a6eSNick Piggin struct zone *pagezone = page_zone(page); 366894bc310SLee Schermerhorn 3671da177e4SLinus Torvalds if (pagezone != zone) { 3681da177e4SLinus Torvalds if (zone) 369902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, 370902aaed0SHisashi Hifumi flags); 3711da177e4SLinus Torvalds zone = pagezone; 372902aaed0SHisashi Hifumi spin_lock_irqsave(&zone->lru_lock, flags); 3731da177e4SLinus Torvalds } 374725d704eSNick Piggin VM_BUG_ON(!PageLRU(page)); 37567453911SNick Piggin __ClearPageLRU(page); 3761da177e4SLinus Torvalds del_page_from_lru(zone, page); 37746453a6eSNick Piggin } 37846453a6eSNick Piggin 3791da177e4SLinus Torvalds if (!pagevec_add(&pages_to_free, page)) { 38046453a6eSNick Piggin if (zone) { 381902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 38246453a6eSNick Piggin zone = NULL; 38346453a6eSNick Piggin } 3841da177e4SLinus Torvalds __pagevec_free(&pages_to_free); 3851da177e4SLinus Torvalds pagevec_reinit(&pages_to_free); 3861da177e4SLinus Torvalds } 3871da177e4SLinus Torvalds } 3881da177e4SLinus Torvalds if (zone) 389902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 3901da177e4SLinus Torvalds 3911da177e4SLinus Torvalds pagevec_free(&pages_to_free); 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds 3941da177e4SLinus Torvalds /* 3951da177e4SLinus Torvalds * The pages which we're about to release may be in the deferred lru-addition 3961da177e4SLinus Torvalds * queues. That would prevent them from really being freed right now. That's 3971da177e4SLinus Torvalds * OK from a correctness point of view but is inefficient - those pages may be 3981da177e4SLinus Torvalds * cache-warm and we want to give them back to the page allocator ASAP. 3991da177e4SLinus Torvalds * 4001da177e4SLinus Torvalds * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 4011da177e4SLinus Torvalds * and __pagevec_lru_add_active() call release_pages() directly to avoid 4021da177e4SLinus Torvalds * mutual recursion. 4031da177e4SLinus Torvalds */ 4041da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec) 4051da177e4SLinus Torvalds { 4061da177e4SLinus Torvalds lru_add_drain(); 4071da177e4SLinus Torvalds release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 4081da177e4SLinus Torvalds pagevec_reinit(pvec); 4091da177e4SLinus Torvalds } 4101da177e4SLinus Torvalds 4117f285701SSteve French EXPORT_SYMBOL(__pagevec_release); 4127f285701SSteve French 4131da177e4SLinus Torvalds /* 4141da177e4SLinus Torvalds * pagevec_release() for pages which are known to not be on the LRU 4151da177e4SLinus Torvalds * 4161da177e4SLinus Torvalds * This function reinitialises the caller's pagevec. 4171da177e4SLinus Torvalds */ 4181da177e4SLinus Torvalds void __pagevec_release_nonlru(struct pagevec *pvec) 4191da177e4SLinus Torvalds { 4201da177e4SLinus Torvalds int i; 4211da177e4SLinus Torvalds struct pagevec pages_to_free; 4221da177e4SLinus Torvalds 4231da177e4SLinus Torvalds pagevec_init(&pages_to_free, pvec->cold); 4241da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 4251da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 4261da177e4SLinus Torvalds 427725d704eSNick Piggin VM_BUG_ON(PageLRU(page)); 4281da177e4SLinus Torvalds if (put_page_testzero(page)) 4291da177e4SLinus Torvalds pagevec_add(&pages_to_free, page); 4301da177e4SLinus Torvalds } 4311da177e4SLinus Torvalds pagevec_free(&pages_to_free); 4321da177e4SLinus Torvalds pagevec_reinit(pvec); 4331da177e4SLinus Torvalds } 4341da177e4SLinus Torvalds 4351da177e4SLinus Torvalds /* 4361da177e4SLinus Torvalds * Add the passed pages to the LRU, then drop the caller's refcount 4371da177e4SLinus Torvalds * on them. Reinitialises the caller's pagevec. 4381da177e4SLinus Torvalds */ 439f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) 4401da177e4SLinus Torvalds { 4411da177e4SLinus Torvalds int i; 4421da177e4SLinus Torvalds struct zone *zone = NULL; 443894bc310SLee Schermerhorn VM_BUG_ON(is_unevictable_lru(lru)); 4441da177e4SLinus Torvalds 4451da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 4461da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 4471da177e4SLinus Torvalds struct zone *pagezone = page_zone(page); 4481da177e4SLinus Torvalds 4491da177e4SLinus Torvalds if (pagezone != zone) { 4501da177e4SLinus Torvalds if (zone) 4511da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 4521da177e4SLinus Torvalds zone = pagezone; 4531da177e4SLinus Torvalds spin_lock_irq(&zone->lru_lock); 4541da177e4SLinus Torvalds } 455894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 456894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 457725d704eSNick Piggin VM_BUG_ON(PageLRU(page)); 4588d438f96SNick Piggin SetPageLRU(page); 459f04e9ebbSKOSAKI Motohiro if (is_active_lru(lru)) 4604c84cacfSNick Piggin SetPageActive(page); 461f04e9ebbSKOSAKI Motohiro add_page_to_lru_list(zone, page, lru); 4621da177e4SLinus Torvalds } 4631da177e4SLinus Torvalds if (zone) 4641da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 4651da177e4SLinus Torvalds release_pages(pvec->pages, pvec->nr, pvec->cold); 4661da177e4SLinus Torvalds pagevec_reinit(pvec); 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds 469f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add); 470f04e9ebbSKOSAKI Motohiro 4711da177e4SLinus Torvalds /* 4721da177e4SLinus Torvalds * Try to drop buffers from the pages in a pagevec 4731da177e4SLinus Torvalds */ 4741da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec) 4751da177e4SLinus Torvalds { 4761da177e4SLinus Torvalds int i; 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 4791da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 4801da177e4SLinus Torvalds 481529ae9aaSNick Piggin if (PagePrivate(page) && trylock_page(page)) { 4825b40dc78SChristoph Lameter if (PagePrivate(page)) 4831da177e4SLinus Torvalds try_to_release_page(page, 0); 4841da177e4SLinus Torvalds unlock_page(page); 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds } 4881da177e4SLinus Torvalds 4891da177e4SLinus Torvalds /** 49068a22394SRik van Riel * pagevec_swap_free - try to free swap space from the pages in a pagevec 49168a22394SRik van Riel * @pvec: pagevec with swapcache pages to free the swap space of 49268a22394SRik van Riel * 49368a22394SRik van Riel * The caller needs to hold an extra reference to each page and 49468a22394SRik van Riel * not hold the page lock on the pages. This function uses a 49568a22394SRik van Riel * trylock on the page lock so it may not always free the swap 49668a22394SRik van Riel * space associated with a page. 49768a22394SRik van Riel */ 49868a22394SRik van Riel void pagevec_swap_free(struct pagevec *pvec) 49968a22394SRik van Riel { 50068a22394SRik van Riel int i; 50168a22394SRik van Riel 50268a22394SRik van Riel for (i = 0; i < pagevec_count(pvec); i++) { 50368a22394SRik van Riel struct page *page = pvec->pages[i]; 50468a22394SRik van Riel 50568a22394SRik van Riel if (PageSwapCache(page) && trylock_page(page)) { 50668a22394SRik van Riel if (PageSwapCache(page)) 50768a22394SRik van Riel remove_exclusive_swap_page_ref(page); 50868a22394SRik van Riel unlock_page(page); 50968a22394SRik van Riel } 51068a22394SRik van Riel } 51168a22394SRik van Riel } 51268a22394SRik van Riel 51368a22394SRik van Riel /** 5141da177e4SLinus Torvalds * pagevec_lookup - gang pagecache lookup 5151da177e4SLinus Torvalds * @pvec: Where the resulting pages are placed 5161da177e4SLinus Torvalds * @mapping: The address_space to search 5171da177e4SLinus Torvalds * @start: The starting page index 5181da177e4SLinus Torvalds * @nr_pages: The maximum number of pages 5191da177e4SLinus Torvalds * 5201da177e4SLinus Torvalds * pagevec_lookup() will search for and return a group of up to @nr_pages pages 5211da177e4SLinus Torvalds * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a 5221da177e4SLinus Torvalds * reference against the pages in @pvec. 5231da177e4SLinus Torvalds * 5241da177e4SLinus Torvalds * The search returns a group of mapping-contiguous pages with ascending 5251da177e4SLinus Torvalds * indexes. There may be holes in the indices due to not-present pages. 5261da177e4SLinus Torvalds * 5271da177e4SLinus Torvalds * pagevec_lookup() returns the number of pages which were found. 5281da177e4SLinus Torvalds */ 5291da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 5301da177e4SLinus Torvalds pgoff_t start, unsigned nr_pages) 5311da177e4SLinus Torvalds { 5321da177e4SLinus Torvalds pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); 5331da177e4SLinus Torvalds return pagevec_count(pvec); 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds 53678539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup); 53778539fdfSChristoph Hellwig 5381da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 5391da177e4SLinus Torvalds pgoff_t *index, int tag, unsigned nr_pages) 5401da177e4SLinus Torvalds { 5411da177e4SLinus Torvalds pvec->nr = find_get_pages_tag(mapping, index, tag, 5421da177e4SLinus Torvalds nr_pages, pvec->pages); 5431da177e4SLinus Torvalds return pagevec_count(pvec); 5441da177e4SLinus Torvalds } 5451da177e4SLinus Torvalds 5467f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag); 5471da177e4SLinus Torvalds 5481da177e4SLinus Torvalds #ifdef CONFIG_SMP 5491da177e4SLinus Torvalds /* 5501da177e4SLinus Torvalds * We tolerate a little inaccuracy to avoid ping-ponging the counter between 5511da177e4SLinus Torvalds * CPUs 5521da177e4SLinus Torvalds */ 5531da177e4SLinus Torvalds #define ACCT_THRESHOLD max(16, NR_CPUS * 2) 5541da177e4SLinus Torvalds 555f84f9504SVegard Nossum static DEFINE_PER_CPU(long, committed_space); 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds void vm_acct_memory(long pages) 5581da177e4SLinus Torvalds { 5591da177e4SLinus Torvalds long *local; 5601da177e4SLinus Torvalds 5611da177e4SLinus Torvalds preempt_disable(); 5621da177e4SLinus Torvalds local = &__get_cpu_var(committed_space); 5631da177e4SLinus Torvalds *local += pages; 5641da177e4SLinus Torvalds if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { 56580119ef5SAlan Cox atomic_long_add(*local, &vm_committed_space); 5661da177e4SLinus Torvalds *local = 0; 5671da177e4SLinus Torvalds } 5681da177e4SLinus Torvalds preempt_enable(); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds #ifdef CONFIG_HOTPLUG_CPU 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds /* Drop the CPU's cached committed space back into the central pool. */ 5741da177e4SLinus Torvalds static int cpu_swap_callback(struct notifier_block *nfb, 5751da177e4SLinus Torvalds unsigned long action, 5761da177e4SLinus Torvalds void *hcpu) 5771da177e4SLinus Torvalds { 5781da177e4SLinus Torvalds long *committed; 5791da177e4SLinus Torvalds 5801da177e4SLinus Torvalds committed = &per_cpu(committed_space, (long)hcpu); 5818bb78442SRafael J. Wysocki if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 58280119ef5SAlan Cox atomic_long_add(*committed, &vm_committed_space); 5831da177e4SLinus Torvalds *committed = 0; 584902aaed0SHisashi Hifumi drain_cpu_pagevecs((long)hcpu); 5851da177e4SLinus Torvalds } 5861da177e4SLinus Torvalds return NOTIFY_OK; 5871da177e4SLinus Torvalds } 5881da177e4SLinus Torvalds #endif /* CONFIG_HOTPLUG_CPU */ 5891da177e4SLinus Torvalds #endif /* CONFIG_SMP */ 5901da177e4SLinus Torvalds 5911da177e4SLinus Torvalds /* 5921da177e4SLinus Torvalds * Perform any setup for the swap system 5931da177e4SLinus Torvalds */ 5941da177e4SLinus Torvalds void __init swap_setup(void) 5951da177e4SLinus Torvalds { 5961da177e4SLinus Torvalds unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); 5971da177e4SLinus Torvalds 598e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP 599e0bf68ddSPeter Zijlstra bdi_init(swapper_space.backing_dev_info); 600e0bf68ddSPeter Zijlstra #endif 601e0bf68ddSPeter Zijlstra 6021da177e4SLinus Torvalds /* Use a smaller cluster for small-memory machines */ 6031da177e4SLinus Torvalds if (megs < 16) 6041da177e4SLinus Torvalds page_cluster = 2; 6051da177e4SLinus Torvalds else 6061da177e4SLinus Torvalds page_cluster = 3; 6071da177e4SLinus Torvalds /* 6081da177e4SLinus Torvalds * Right now other parts of the system means that we 6091da177e4SLinus Torvalds * _really_ don't want to cluster much more 6101da177e4SLinus Torvalds */ 61102316067SIngo Molnar #ifdef CONFIG_HOTPLUG_CPU 6121da177e4SLinus Torvalds hotcpu_notifier(cpu_swap_callback, 0); 61302316067SIngo Molnar #endif 6141da177e4SLinus Torvalds } 615