11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds /* 8183ff22bSSimon Arlott * This file contains the default values for the operation of the 91da177e4SLinus Torvalds * Linux VM subsystem. Fine-tuning documentation can be found in 101da177e4SLinus Torvalds * Documentation/sysctl/vm.txt. 111da177e4SLinus Torvalds * Started 18.12.91 121da177e4SLinus Torvalds * Swap aging added 23.2.95, Stephen Tweedie. 131da177e4SLinus Torvalds * Buffermem limits added 12.3.98, Rik van Riel. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/sched.h> 181da177e4SLinus Torvalds #include <linux/kernel_stat.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/mman.h> 211da177e4SLinus Torvalds #include <linux/pagemap.h> 221da177e4SLinus Torvalds #include <linux/pagevec.h> 231da177e4SLinus Torvalds #include <linux/init.h> 241da177e4SLinus Torvalds #include <linux/module.h> 251da177e4SLinus Torvalds #include <linux/mm_inline.h> 261da177e4SLinus Torvalds #include <linux/buffer_head.h> /* for try_to_release_page() */ 271da177e4SLinus Torvalds #include <linux/percpu_counter.h> 281da177e4SLinus Torvalds #include <linux/percpu.h> 291da177e4SLinus Torvalds #include <linux/cpu.h> 301da177e4SLinus Torvalds #include <linux/notifier.h> 31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h> 3266e1707bSBalbir Singh #include <linux/memcontrol.h> 335a0e3ad6STejun Heo #include <linux/gfp.h> 341da177e4SLinus Torvalds 3564d6519dSLee Schermerhorn #include "internal.h" 3664d6519dSLee Schermerhorn 371da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */ 381da177e4SLinus Torvalds int page_cluster; 391da177e4SLinus Torvalds 40f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); 41f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 42902aaed0SHisashi Hifumi 43b221385bSAdrian Bunk /* 44b221385bSAdrian Bunk * This path almost never happens for VM activity - pages are normally 45b221385bSAdrian Bunk * freed via pagevecs. But it gets used by networking. 46b221385bSAdrian Bunk */ 47920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page) 48b221385bSAdrian Bunk { 49b221385bSAdrian Bunk if (PageLRU(page)) { 50b221385bSAdrian Bunk unsigned long flags; 51b221385bSAdrian Bunk struct zone *zone = page_zone(page); 52b221385bSAdrian Bunk 53b221385bSAdrian Bunk spin_lock_irqsave(&zone->lru_lock, flags); 54b221385bSAdrian Bunk VM_BUG_ON(!PageLRU(page)); 55b221385bSAdrian Bunk __ClearPageLRU(page); 56b221385bSAdrian Bunk del_page_from_lru(zone, page); 57b221385bSAdrian Bunk spin_unlock_irqrestore(&zone->lru_lock, flags); 58b221385bSAdrian Bunk } 5991807063SAndrea Arcangeli } 6091807063SAndrea Arcangeli 6191807063SAndrea Arcangeli static void __put_single_page(struct page *page) 6291807063SAndrea Arcangeli { 6391807063SAndrea Arcangeli __page_cache_release(page); 64fc91668eSLi Hong free_hot_cold_page(page, 0); 65b221385bSAdrian Bunk } 66b221385bSAdrian Bunk 6791807063SAndrea Arcangeli static void __put_compound_page(struct page *page) 6891807063SAndrea Arcangeli { 6991807063SAndrea Arcangeli compound_page_dtor *dtor; 7091807063SAndrea Arcangeli 7191807063SAndrea Arcangeli __page_cache_release(page); 7291807063SAndrea Arcangeli dtor = get_compound_page_dtor(page); 7391807063SAndrea Arcangeli (*dtor)(page); 7491807063SAndrea Arcangeli } 7591807063SAndrea Arcangeli 768519fb30SNick Piggin static void put_compound_page(struct page *page) 771da177e4SLinus Torvalds { 7891807063SAndrea Arcangeli if (unlikely(PageTail(page))) { 7991807063SAndrea Arcangeli /* __split_huge_page_refcount can run under us */ 8091807063SAndrea Arcangeli struct page *page_head = page->first_page; 8191807063SAndrea Arcangeli smp_rmb(); 8291807063SAndrea Arcangeli /* 8391807063SAndrea Arcangeli * If PageTail is still set after smp_rmb() we can be sure 8491807063SAndrea Arcangeli * that the page->first_page we read wasn't a dangling pointer. 8591807063SAndrea Arcangeli * See __split_huge_page_refcount() smp_wmb(). 8691807063SAndrea Arcangeli */ 8791807063SAndrea Arcangeli if (likely(PageTail(page) && get_page_unless_zero(page_head))) { 8891807063SAndrea Arcangeli unsigned long flags; 8991807063SAndrea Arcangeli /* 9091807063SAndrea Arcangeli * Verify that our page_head wasn't converted 9191807063SAndrea Arcangeli * to a a regular page before we got a 9291807063SAndrea Arcangeli * reference on it. 9391807063SAndrea Arcangeli */ 9491807063SAndrea Arcangeli if (unlikely(!PageHead(page_head))) { 9591807063SAndrea Arcangeli /* PageHead is cleared after PageTail */ 9691807063SAndrea Arcangeli smp_rmb(); 9791807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 9891807063SAndrea Arcangeli goto out_put_head; 9991807063SAndrea Arcangeli } 10091807063SAndrea Arcangeli /* 10191807063SAndrea Arcangeli * Only run compound_lock on a valid PageHead, 10291807063SAndrea Arcangeli * after having it pinned with 10391807063SAndrea Arcangeli * get_page_unless_zero() above. 10491807063SAndrea Arcangeli */ 10591807063SAndrea Arcangeli smp_mb(); 10691807063SAndrea Arcangeli /* page_head wasn't a dangling pointer */ 10791807063SAndrea Arcangeli flags = compound_lock_irqsave(page_head); 10891807063SAndrea Arcangeli if (unlikely(!PageTail(page))) { 10991807063SAndrea Arcangeli /* __split_huge_page_refcount run before us */ 11091807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 11191807063SAndrea Arcangeli VM_BUG_ON(PageHead(page_head)); 11291807063SAndrea Arcangeli out_put_head: 11391807063SAndrea Arcangeli if (put_page_testzero(page_head)) 11491807063SAndrea Arcangeli __put_single_page(page_head); 11591807063SAndrea Arcangeli out_put_single: 11691807063SAndrea Arcangeli if (put_page_testzero(page)) 11791807063SAndrea Arcangeli __put_single_page(page); 11891807063SAndrea Arcangeli return; 11991807063SAndrea Arcangeli } 12091807063SAndrea Arcangeli VM_BUG_ON(page_head != page->first_page); 12191807063SAndrea Arcangeli /* 12291807063SAndrea Arcangeli * We can release the refcount taken by 12391807063SAndrea Arcangeli * get_page_unless_zero now that 12491807063SAndrea Arcangeli * split_huge_page_refcount is blocked on the 12591807063SAndrea Arcangeli * compound_lock. 12691807063SAndrea Arcangeli */ 12791807063SAndrea Arcangeli if (put_page_testzero(page_head)) 12891807063SAndrea Arcangeli VM_BUG_ON(1); 12991807063SAndrea Arcangeli /* __split_huge_page_refcount will wait now */ 13091807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page->_count) <= 0); 13191807063SAndrea Arcangeli atomic_dec(&page->_count); 13291807063SAndrea Arcangeli VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 13391807063SAndrea Arcangeli compound_unlock_irqrestore(page_head, flags); 134a95a82e9SAndrea Arcangeli if (put_page_testzero(page_head)) { 135a95a82e9SAndrea Arcangeli if (PageHead(page_head)) 13691807063SAndrea Arcangeli __put_compound_page(page_head); 137a95a82e9SAndrea Arcangeli else 138a95a82e9SAndrea Arcangeli __put_single_page(page_head); 139a95a82e9SAndrea Arcangeli } 14091807063SAndrea Arcangeli } else { 14191807063SAndrea Arcangeli /* page_head is a dangling pointer */ 14291807063SAndrea Arcangeli VM_BUG_ON(PageTail(page)); 14391807063SAndrea Arcangeli goto out_put_single; 14491807063SAndrea Arcangeli } 14591807063SAndrea Arcangeli } else if (put_page_testzero(page)) { 14691807063SAndrea Arcangeli if (PageHead(page)) 14791807063SAndrea Arcangeli __put_compound_page(page); 14891807063SAndrea Arcangeli else 14991807063SAndrea Arcangeli __put_single_page(page); 1501da177e4SLinus Torvalds } 1511da177e4SLinus Torvalds } 1528519fb30SNick Piggin 1538519fb30SNick Piggin void put_page(struct page *page) 1548519fb30SNick Piggin { 1558519fb30SNick Piggin if (unlikely(PageCompound(page))) 1568519fb30SNick Piggin put_compound_page(page); 1578519fb30SNick Piggin else if (put_page_testzero(page)) 15891807063SAndrea Arcangeli __put_single_page(page); 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds EXPORT_SYMBOL(put_page); 1611da177e4SLinus Torvalds 1621d7ea732SAlexander Zarochentsev /** 1637682486bSRandy Dunlap * put_pages_list() - release a list of pages 1647682486bSRandy Dunlap * @pages: list of pages threaded on page->lru 1651d7ea732SAlexander Zarochentsev * 1661d7ea732SAlexander Zarochentsev * Release a list of pages which are strung together on page.lru. Currently 1671d7ea732SAlexander Zarochentsev * used by read_cache_pages() and related error recovery code. 1681d7ea732SAlexander Zarochentsev */ 1691d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages) 1701d7ea732SAlexander Zarochentsev { 1711d7ea732SAlexander Zarochentsev while (!list_empty(pages)) { 1721d7ea732SAlexander Zarochentsev struct page *victim; 1731d7ea732SAlexander Zarochentsev 1741d7ea732SAlexander Zarochentsev victim = list_entry(pages->prev, struct page, lru); 1751d7ea732SAlexander Zarochentsev list_del(&victim->lru); 1761d7ea732SAlexander Zarochentsev page_cache_release(victim); 1771d7ea732SAlexander Zarochentsev } 1781d7ea732SAlexander Zarochentsev } 1791d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list); 1801d7ea732SAlexander Zarochentsev 181d8505deeSShaohua Li static void pagevec_lru_move_fn(struct pagevec *pvec, 182d8505deeSShaohua Li void (*move_fn)(struct page *page, void *arg), 183d8505deeSShaohua Li void *arg) 184902aaed0SHisashi Hifumi { 185902aaed0SHisashi Hifumi int i; 186902aaed0SHisashi Hifumi struct zone *zone = NULL; 187d8505deeSShaohua Li unsigned long flags = 0; 188902aaed0SHisashi Hifumi 189902aaed0SHisashi Hifumi for (i = 0; i < pagevec_count(pvec); i++) { 190902aaed0SHisashi Hifumi struct page *page = pvec->pages[i]; 191902aaed0SHisashi Hifumi struct zone *pagezone = page_zone(page); 192902aaed0SHisashi Hifumi 193902aaed0SHisashi Hifumi if (pagezone != zone) { 194902aaed0SHisashi Hifumi if (zone) 195d8505deeSShaohua Li spin_unlock_irqrestore(&zone->lru_lock, flags); 196902aaed0SHisashi Hifumi zone = pagezone; 197d8505deeSShaohua Li spin_lock_irqsave(&zone->lru_lock, flags); 198902aaed0SHisashi Hifumi } 199d8505deeSShaohua Li 200d8505deeSShaohua Li (*move_fn)(page, arg); 201d8505deeSShaohua Li } 202d8505deeSShaohua Li if (zone) 203d8505deeSShaohua Li spin_unlock_irqrestore(&zone->lru_lock, flags); 204d8505deeSShaohua Li release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 205d8505deeSShaohua Li pagevec_reinit(pvec); 206d8505deeSShaohua Li } 207d8505deeSShaohua Li 208d8505deeSShaohua Li static void pagevec_move_tail_fn(struct page *page, void *arg) 209d8505deeSShaohua Li { 210d8505deeSShaohua Li int *pgmoved = arg; 211d8505deeSShaohua Li struct zone *zone = page_zone(page); 212d8505deeSShaohua Li 213894bc310SLee Schermerhorn if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 214401a8e1cSJohannes Weiner int lru = page_lru_base_type(page); 2154f98a2feSRik van Riel list_move_tail(&page->lru, &zone->lru[lru].list); 216d8505deeSShaohua Li (*pgmoved)++; 217902aaed0SHisashi Hifumi } 218902aaed0SHisashi Hifumi } 219d8505deeSShaohua Li 220d8505deeSShaohua Li /* 221d8505deeSShaohua Li * pagevec_move_tail() must be called with IRQ disabled. 222d8505deeSShaohua Li * Otherwise this may cause nasty races. 223d8505deeSShaohua Li */ 224d8505deeSShaohua Li static void pagevec_move_tail(struct pagevec *pvec) 225d8505deeSShaohua Li { 226d8505deeSShaohua Li int pgmoved = 0; 227d8505deeSShaohua Li 228d8505deeSShaohua Li pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); 229902aaed0SHisashi Hifumi __count_vm_events(PGROTATED, pgmoved); 230902aaed0SHisashi Hifumi } 231902aaed0SHisashi Hifumi 232902aaed0SHisashi Hifumi /* 2331da177e4SLinus Torvalds * Writeback is about to end against a page which has been marked for immediate 2341da177e4SLinus Torvalds * reclaim. If it still appears to be reclaimable, move it to the tail of the 235902aaed0SHisashi Hifumi * inactive list. 2361da177e4SLinus Torvalds */ 237ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page) 2381da177e4SLinus Torvalds { 239ac6aadb2SMiklos Szeredi if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 240894bc310SLee Schermerhorn !PageUnevictable(page) && PageLRU(page)) { 241902aaed0SHisashi Hifumi struct pagevec *pvec; 2421da177e4SLinus Torvalds unsigned long flags; 2431da177e4SLinus Torvalds 244902aaed0SHisashi Hifumi page_cache_get(page); 245902aaed0SHisashi Hifumi local_irq_save(flags); 246902aaed0SHisashi Hifumi pvec = &__get_cpu_var(lru_rotate_pvecs); 247902aaed0SHisashi Hifumi if (!pagevec_add(pvec, page)) 248902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 249902aaed0SHisashi Hifumi local_irq_restore(flags); 250ac6aadb2SMiklos Szeredi } 2511da177e4SLinus Torvalds } 2521da177e4SLinus Torvalds 2533e2f41f1SKOSAKI Motohiro static void update_page_reclaim_stat(struct zone *zone, struct page *page, 2543e2f41f1SKOSAKI Motohiro int file, int rotated) 2553e2f41f1SKOSAKI Motohiro { 2563e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; 2573e2f41f1SKOSAKI Motohiro struct zone_reclaim_stat *memcg_reclaim_stat; 2583e2f41f1SKOSAKI Motohiro 2593e2f41f1SKOSAKI Motohiro memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); 2603e2f41f1SKOSAKI Motohiro 2613e2f41f1SKOSAKI Motohiro reclaim_stat->recent_scanned[file]++; 2623e2f41f1SKOSAKI Motohiro if (rotated) 2633e2f41f1SKOSAKI Motohiro reclaim_stat->recent_rotated[file]++; 2643e2f41f1SKOSAKI Motohiro 2653e2f41f1SKOSAKI Motohiro if (!memcg_reclaim_stat) 2663e2f41f1SKOSAKI Motohiro return; 2673e2f41f1SKOSAKI Motohiro 2683e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_scanned[file]++; 2693e2f41f1SKOSAKI Motohiro if (rotated) 2703e2f41f1SKOSAKI Motohiro memcg_reclaim_stat->recent_rotated[file]++; 2713e2f41f1SKOSAKI Motohiro } 2723e2f41f1SKOSAKI Motohiro 2731da177e4SLinus Torvalds /* 274*7a608572SLinus Torvalds * FIXME: speed this up? 2751da177e4SLinus Torvalds */ 276*7a608572SLinus Torvalds void activate_page(struct page *page) 277744ed144SShaohua Li { 278744ed144SShaohua Li struct zone *zone = page_zone(page); 279*7a608572SLinus Torvalds 280*7a608572SLinus Torvalds spin_lock_irq(&zone->lru_lock); 281*7a608572SLinus Torvalds if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 282744ed144SShaohua Li int file = page_is_file_cache(page); 283744ed144SShaohua Li int lru = page_lru_base_type(page); 284744ed144SShaohua Li del_page_from_lru_list(zone, page, lru); 285744ed144SShaohua Li 286744ed144SShaohua Li SetPageActive(page); 287744ed144SShaohua Li lru += LRU_ACTIVE; 288744ed144SShaohua Li add_page_to_lru_list(zone, page, lru); 289744ed144SShaohua Li __count_vm_event(PGACTIVATE); 290*7a608572SLinus Torvalds 291744ed144SShaohua Li update_page_reclaim_stat(zone, page, file, 1); 292744ed144SShaohua Li } 2931da177e4SLinus Torvalds spin_unlock_irq(&zone->lru_lock); 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds /* 2971da177e4SLinus Torvalds * Mark a page as having seen activity. 2981da177e4SLinus Torvalds * 2991da177e4SLinus Torvalds * inactive,unreferenced -> inactive,referenced 3001da177e4SLinus Torvalds * inactive,referenced -> active,unreferenced 3011da177e4SLinus Torvalds * active,unreferenced -> active,referenced 3021da177e4SLinus Torvalds */ 303920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page) 3041da177e4SLinus Torvalds { 305894bc310SLee Schermerhorn if (!PageActive(page) && !PageUnevictable(page) && 306894bc310SLee Schermerhorn PageReferenced(page) && PageLRU(page)) { 3071da177e4SLinus Torvalds activate_page(page); 3081da177e4SLinus Torvalds ClearPageReferenced(page); 3091da177e4SLinus Torvalds } else if (!PageReferenced(page)) { 3101da177e4SLinus Torvalds SetPageReferenced(page); 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds } 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed); 3151da177e4SLinus Torvalds 316f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru) 3171da177e4SLinus Torvalds { 318f04e9ebbSKOSAKI Motohiro struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds page_cache_get(page); 3211da177e4SLinus Torvalds if (!pagevec_add(pvec, page)) 322f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 3231da177e4SLinus Torvalds put_cpu_var(lru_add_pvecs); 3241da177e4SLinus Torvalds } 32547846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add); 3261da177e4SLinus Torvalds 327f04e9ebbSKOSAKI Motohiro /** 328f04e9ebbSKOSAKI Motohiro * lru_cache_add_lru - add a page to a page list 329f04e9ebbSKOSAKI Motohiro * @page: the page to be added to the LRU. 330f04e9ebbSKOSAKI Motohiro * @lru: the LRU list to which the page is added. 331f04e9ebbSKOSAKI Motohiro */ 332f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru) 3331da177e4SLinus Torvalds { 334f04e9ebbSKOSAKI Motohiro if (PageActive(page)) { 335894bc310SLee Schermerhorn VM_BUG_ON(PageUnevictable(page)); 336f04e9ebbSKOSAKI Motohiro ClearPageActive(page); 337894bc310SLee Schermerhorn } else if (PageUnevictable(page)) { 338894bc310SLee Schermerhorn VM_BUG_ON(PageActive(page)); 339894bc310SLee Schermerhorn ClearPageUnevictable(page); 340f04e9ebbSKOSAKI Motohiro } 3411da177e4SLinus Torvalds 342894bc310SLee Schermerhorn VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); 343f04e9ebbSKOSAKI Motohiro __lru_cache_add(page, lru); 3441da177e4SLinus Torvalds } 3451da177e4SLinus Torvalds 346894bc310SLee Schermerhorn /** 347894bc310SLee Schermerhorn * add_page_to_unevictable_list - add a page to the unevictable list 348894bc310SLee Schermerhorn * @page: the page to be added to the unevictable list 349894bc310SLee Schermerhorn * 350894bc310SLee Schermerhorn * Add page directly to its zone's unevictable list. To avoid races with 351894bc310SLee Schermerhorn * tasks that might be making the page evictable, through eg. munlock, 352894bc310SLee Schermerhorn * munmap or exit, while it's not on the lru, we want to add the page 353894bc310SLee Schermerhorn * while it's locked or otherwise "invisible" to other tasks. This is 354894bc310SLee Schermerhorn * difficult to do when using the pagevec cache, so bypass that. 355894bc310SLee Schermerhorn */ 356894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page) 357894bc310SLee Schermerhorn { 358894bc310SLee Schermerhorn struct zone *zone = page_zone(page); 359894bc310SLee Schermerhorn 360894bc310SLee Schermerhorn spin_lock_irq(&zone->lru_lock); 361894bc310SLee Schermerhorn SetPageUnevictable(page); 362894bc310SLee Schermerhorn SetPageLRU(page); 363894bc310SLee Schermerhorn add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); 364894bc310SLee Schermerhorn spin_unlock_irq(&zone->lru_lock); 365894bc310SLee Schermerhorn } 366894bc310SLee Schermerhorn 367902aaed0SHisashi Hifumi /* 368902aaed0SHisashi Hifumi * Drain pages out of the cpu's pagevecs. 369902aaed0SHisashi Hifumi * Either "cpu" is the current CPU, and preemption has already been 370902aaed0SHisashi Hifumi * disabled; or "cpu" is being hot-unplugged, and is already dead. 371902aaed0SHisashi Hifumi */ 372902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu) 3731da177e4SLinus Torvalds { 374f04e9ebbSKOSAKI Motohiro struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); 375902aaed0SHisashi Hifumi struct pagevec *pvec; 376f04e9ebbSKOSAKI Motohiro int lru; 3771da177e4SLinus Torvalds 378f04e9ebbSKOSAKI Motohiro for_each_lru(lru) { 379f04e9ebbSKOSAKI Motohiro pvec = &pvecs[lru - LRU_BASE]; 3801da177e4SLinus Torvalds if (pagevec_count(pvec)) 381f04e9ebbSKOSAKI Motohiro ____pagevec_lru_add(pvec, lru); 382f04e9ebbSKOSAKI Motohiro } 383902aaed0SHisashi Hifumi 384902aaed0SHisashi Hifumi pvec = &per_cpu(lru_rotate_pvecs, cpu); 385902aaed0SHisashi Hifumi if (pagevec_count(pvec)) { 386902aaed0SHisashi Hifumi unsigned long flags; 387902aaed0SHisashi Hifumi 388902aaed0SHisashi Hifumi /* No harm done if a racing interrupt already did this */ 389902aaed0SHisashi Hifumi local_irq_save(flags); 390902aaed0SHisashi Hifumi pagevec_move_tail(pvec); 391902aaed0SHisashi Hifumi local_irq_restore(flags); 392902aaed0SHisashi Hifumi } 39380bfed90SAndrew Morton } 39480bfed90SAndrew Morton 39580bfed90SAndrew Morton void lru_add_drain(void) 39680bfed90SAndrew Morton { 397902aaed0SHisashi Hifumi drain_cpu_pagevecs(get_cpu()); 39880bfed90SAndrew Morton put_cpu(); 3991da177e4SLinus Torvalds } 4001da177e4SLinus Torvalds 401c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy) 402053837fcSNick Piggin { 403053837fcSNick Piggin lru_add_drain(); 404053837fcSNick Piggin } 405053837fcSNick Piggin 406053837fcSNick Piggin /* 407053837fcSNick Piggin * Returns 0 for success 408053837fcSNick Piggin */ 409053837fcSNick Piggin int lru_add_drain_all(void) 410053837fcSNick Piggin { 411c4028958SDavid Howells return schedule_on_each_cpu(lru_add_drain_per_cpu); 412053837fcSNick Piggin } 413053837fcSNick Piggin 4141da177e4SLinus Torvalds /* 4151da177e4SLinus Torvalds * Batched page_cache_release(). Decrement the reference count on all the 4161da177e4SLinus Torvalds * passed pages. If it fell to zero then remove the page from the LRU and 4171da177e4SLinus Torvalds * free it. 4181da177e4SLinus Torvalds * 4191da177e4SLinus Torvalds * Avoid taking zone->lru_lock if possible, but if it is taken, retain it 4201da177e4SLinus Torvalds * for the remainder of the operation. 4211da177e4SLinus Torvalds * 422ab33dc09SFernando Luis Vazquez Cao * The locking in this function is against shrink_inactive_list(): we recheck 423ab33dc09SFernando Luis Vazquez Cao * the page count inside the lock to see whether shrink_inactive_list() 424ab33dc09SFernando Luis Vazquez Cao * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() 425ab33dc09SFernando Luis Vazquez Cao * will free it. 4261da177e4SLinus Torvalds */ 4271da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold) 4281da177e4SLinus Torvalds { 4291da177e4SLinus Torvalds int i; 4301da177e4SLinus Torvalds struct pagevec pages_to_free; 4311da177e4SLinus Torvalds struct zone *zone = NULL; 432902aaed0SHisashi Hifumi unsigned long uninitialized_var(flags); 4331da177e4SLinus Torvalds 4341da177e4SLinus Torvalds pagevec_init(&pages_to_free, cold); 4351da177e4SLinus Torvalds for (i = 0; i < nr; i++) { 4361da177e4SLinus Torvalds struct page *page = pages[i]; 4371da177e4SLinus Torvalds 4388519fb30SNick Piggin if (unlikely(PageCompound(page))) { 4398519fb30SNick Piggin if (zone) { 440902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 4418519fb30SNick Piggin zone = NULL; 4428519fb30SNick Piggin } 4438519fb30SNick Piggin put_compound_page(page); 4448519fb30SNick Piggin continue; 4458519fb30SNick Piggin } 4468519fb30SNick Piggin 447b5810039SNick Piggin if (!put_page_testzero(page)) 4481da177e4SLinus Torvalds continue; 4491da177e4SLinus Torvalds 45046453a6eSNick Piggin if (PageLRU(page)) { 45146453a6eSNick Piggin struct zone *pagezone = page_zone(page); 452894bc310SLee Schermerhorn 4531da177e4SLinus Torvalds if (pagezone != zone) { 4541da177e4SLinus Torvalds if (zone) 455902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, 456902aaed0SHisashi Hifumi flags); 4571da177e4SLinus Torvalds zone = pagezone; 458902aaed0SHisashi Hifumi spin_lock_irqsave(&zone->lru_lock, flags); 4591da177e4SLinus Torvalds } 460725d704eSNick Piggin VM_BUG_ON(!PageLRU(page)); 46167453911SNick Piggin __ClearPageLRU(page); 4621da177e4SLinus Torvalds del_page_from_lru(zone, page); 46346453a6eSNick Piggin } 46446453a6eSNick Piggin 4651da177e4SLinus Torvalds if (!pagevec_add(&pages_to_free, page)) { 46646453a6eSNick Piggin if (zone) { 467902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 46846453a6eSNick Piggin zone = NULL; 46946453a6eSNick Piggin } 4701da177e4SLinus Torvalds __pagevec_free(&pages_to_free); 4711da177e4SLinus Torvalds pagevec_reinit(&pages_to_free); 4721da177e4SLinus Torvalds } 4731da177e4SLinus Torvalds } 4741da177e4SLinus Torvalds if (zone) 475902aaed0SHisashi Hifumi spin_unlock_irqrestore(&zone->lru_lock, flags); 4761da177e4SLinus Torvalds 4771da177e4SLinus Torvalds pagevec_free(&pages_to_free); 4781da177e4SLinus Torvalds } 4790be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages); 4801da177e4SLinus Torvalds 4811da177e4SLinus Torvalds /* 4821da177e4SLinus Torvalds * The pages which we're about to release may be in the deferred lru-addition 4831da177e4SLinus Torvalds * queues. That would prevent them from really being freed right now. That's 4841da177e4SLinus Torvalds * OK from a correctness point of view but is inefficient - those pages may be 4851da177e4SLinus Torvalds * cache-warm and we want to give them back to the page allocator ASAP. 4861da177e4SLinus Torvalds * 4871da177e4SLinus Torvalds * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 4881da177e4SLinus Torvalds * and __pagevec_lru_add_active() call release_pages() directly to avoid 4891da177e4SLinus Torvalds * mutual recursion. 4901da177e4SLinus Torvalds */ 4911da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec) 4921da177e4SLinus Torvalds { 4931da177e4SLinus Torvalds lru_add_drain(); 4941da177e4SLinus Torvalds release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 4951da177e4SLinus Torvalds pagevec_reinit(pvec); 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds 4987f285701SSteve French EXPORT_SYMBOL(__pagevec_release); 4997f285701SSteve French 50071e3aac0SAndrea Arcangeli /* used by __split_huge_page_refcount() */ 50171e3aac0SAndrea Arcangeli void lru_add_page_tail(struct zone* zone, 50271e3aac0SAndrea Arcangeli struct page *page, struct page *page_tail) 50371e3aac0SAndrea Arcangeli { 50471e3aac0SAndrea Arcangeli int active; 50571e3aac0SAndrea Arcangeli enum lru_list lru; 50671e3aac0SAndrea Arcangeli const int file = 0; 50771e3aac0SAndrea Arcangeli struct list_head *head; 50871e3aac0SAndrea Arcangeli 50971e3aac0SAndrea Arcangeli VM_BUG_ON(!PageHead(page)); 51071e3aac0SAndrea Arcangeli VM_BUG_ON(PageCompound(page_tail)); 51171e3aac0SAndrea Arcangeli VM_BUG_ON(PageLRU(page_tail)); 51271e3aac0SAndrea Arcangeli VM_BUG_ON(!spin_is_locked(&zone->lru_lock)); 51371e3aac0SAndrea Arcangeli 51471e3aac0SAndrea Arcangeli SetPageLRU(page_tail); 51571e3aac0SAndrea Arcangeli 51671e3aac0SAndrea Arcangeli if (page_evictable(page_tail, NULL)) { 51771e3aac0SAndrea Arcangeli if (PageActive(page)) { 51871e3aac0SAndrea Arcangeli SetPageActive(page_tail); 51971e3aac0SAndrea Arcangeli active = 1; 52071e3aac0SAndrea Arcangeli lru = LRU_ACTIVE_ANON; 52171e3aac0SAndrea Arcangeli } else { 52271e3aac0SAndrea Arcangeli active = 0; 52371e3aac0SAndrea Arcangeli lru = LRU_INACTIVE_ANON; 52471e3aac0SAndrea Arcangeli } 52571e3aac0SAndrea Arcangeli update_page_reclaim_stat(zone, page_tail, file, active); 52671e3aac0SAndrea Arcangeli if (likely(PageLRU(page))) 52771e3aac0SAndrea Arcangeli head = page->lru.prev; 52871e3aac0SAndrea Arcangeli else 52971e3aac0SAndrea Arcangeli head = &zone->lru[lru].list; 53071e3aac0SAndrea Arcangeli __add_page_to_lru_list(zone, page_tail, lru, head); 53171e3aac0SAndrea Arcangeli } else { 53271e3aac0SAndrea Arcangeli SetPageUnevictable(page_tail); 53371e3aac0SAndrea Arcangeli add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); 53471e3aac0SAndrea Arcangeli } 53571e3aac0SAndrea Arcangeli } 53671e3aac0SAndrea Arcangeli 537d8505deeSShaohua Li static void ____pagevec_lru_add_fn(struct page *page, void *arg) 538d8505deeSShaohua Li { 539d8505deeSShaohua Li enum lru_list lru = (enum lru_list)arg; 540d8505deeSShaohua Li struct zone *zone = page_zone(page); 541d8505deeSShaohua Li int file = is_file_lru(lru); 542d8505deeSShaohua Li int active = is_active_lru(lru); 543d8505deeSShaohua Li 544d8505deeSShaohua Li VM_BUG_ON(PageActive(page)); 545d8505deeSShaohua Li VM_BUG_ON(PageUnevictable(page)); 546d8505deeSShaohua Li VM_BUG_ON(PageLRU(page)); 547d8505deeSShaohua Li 548d8505deeSShaohua Li SetPageLRU(page); 549d8505deeSShaohua Li if (active) 550d8505deeSShaohua Li SetPageActive(page); 551d8505deeSShaohua Li update_page_reclaim_stat(zone, page, file, active); 552d8505deeSShaohua Li add_page_to_lru_list(zone, page, lru); 553d8505deeSShaohua Li } 554d8505deeSShaohua Li 5551da177e4SLinus Torvalds /* 5561da177e4SLinus Torvalds * Add the passed pages to the LRU, then drop the caller's refcount 5571da177e4SLinus Torvalds * on them. Reinitialises the caller's pagevec. 5581da177e4SLinus Torvalds */ 559f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) 5601da177e4SLinus Torvalds { 561894bc310SLee Schermerhorn VM_BUG_ON(is_unevictable_lru(lru)); 5621da177e4SLinus Torvalds 563d8505deeSShaohua Li pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru); 5641da177e4SLinus Torvalds } 5651da177e4SLinus Torvalds 566f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add); 567f04e9ebbSKOSAKI Motohiro 5681da177e4SLinus Torvalds /* 5691da177e4SLinus Torvalds * Try to drop buffers from the pages in a pagevec 5701da177e4SLinus Torvalds */ 5711da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec) 5721da177e4SLinus Torvalds { 5731da177e4SLinus Torvalds int i; 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds for (i = 0; i < pagevec_count(pvec); i++) { 5761da177e4SLinus Torvalds struct page *page = pvec->pages[i]; 5771da177e4SLinus Torvalds 578266cf658SDavid Howells if (page_has_private(page) && trylock_page(page)) { 579266cf658SDavid Howells if (page_has_private(page)) 5801da177e4SLinus Torvalds try_to_release_page(page, 0); 5811da177e4SLinus Torvalds unlock_page(page); 5821da177e4SLinus Torvalds } 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds } 5851da177e4SLinus Torvalds 5861da177e4SLinus Torvalds /** 5871da177e4SLinus Torvalds * pagevec_lookup - gang pagecache lookup 5881da177e4SLinus Torvalds * @pvec: Where the resulting pages are placed 5891da177e4SLinus Torvalds * @mapping: The address_space to search 5901da177e4SLinus Torvalds * @start: The starting page index 5911da177e4SLinus Torvalds * @nr_pages: The maximum number of pages 5921da177e4SLinus Torvalds * 5931da177e4SLinus Torvalds * pagevec_lookup() will search for and return a group of up to @nr_pages pages 5941da177e4SLinus Torvalds * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a 5951da177e4SLinus Torvalds * reference against the pages in @pvec. 5961da177e4SLinus Torvalds * 5971da177e4SLinus Torvalds * The search returns a group of mapping-contiguous pages with ascending 5981da177e4SLinus Torvalds * indexes. There may be holes in the indices due to not-present pages. 5991da177e4SLinus Torvalds * 6001da177e4SLinus Torvalds * pagevec_lookup() returns the number of pages which were found. 6011da177e4SLinus Torvalds */ 6021da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 6031da177e4SLinus Torvalds pgoff_t start, unsigned nr_pages) 6041da177e4SLinus Torvalds { 6051da177e4SLinus Torvalds pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); 6061da177e4SLinus Torvalds return pagevec_count(pvec); 6071da177e4SLinus Torvalds } 6081da177e4SLinus Torvalds 60978539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup); 61078539fdfSChristoph Hellwig 6111da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 6121da177e4SLinus Torvalds pgoff_t *index, int tag, unsigned nr_pages) 6131da177e4SLinus Torvalds { 6141da177e4SLinus Torvalds pvec->nr = find_get_pages_tag(mapping, index, tag, 6151da177e4SLinus Torvalds nr_pages, pvec->pages); 6161da177e4SLinus Torvalds return pagevec_count(pvec); 6171da177e4SLinus Torvalds } 6181da177e4SLinus Torvalds 6197f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag); 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds /* 6221da177e4SLinus Torvalds * Perform any setup for the swap system 6231da177e4SLinus Torvalds */ 6241da177e4SLinus Torvalds void __init swap_setup(void) 6251da177e4SLinus Torvalds { 6264481374cSJan Beulich unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); 6271da177e4SLinus Torvalds 628e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP 629e0bf68ddSPeter Zijlstra bdi_init(swapper_space.backing_dev_info); 630e0bf68ddSPeter Zijlstra #endif 631e0bf68ddSPeter Zijlstra 6321da177e4SLinus Torvalds /* Use a smaller cluster for small-memory machines */ 6331da177e4SLinus Torvalds if (megs < 16) 6341da177e4SLinus Torvalds page_cluster = 2; 6351da177e4SLinus Torvalds else 6361da177e4SLinus Torvalds page_cluster = 3; 6371da177e4SLinus Torvalds /* 6381da177e4SLinus Torvalds * Right now other parts of the system means that we 6391da177e4SLinus Torvalds * _really_ don't want to cluster much more 6401da177e4SLinus Torvalds */ 6411da177e4SLinus Torvalds } 642