xref: /linux/mm/swap.c (revision 315601809d124d046abd6c3ffa346d0dbd7aa29d)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  linux/mm/swap.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
51da177e4SLinus Torvalds  */
61da177e4SLinus Torvalds 
71da177e4SLinus Torvalds /*
8183ff22bSSimon Arlott  * This file contains the default values for the operation of the
91da177e4SLinus Torvalds  * Linux VM subsystem. Fine-tuning documentation can be found in
101da177e4SLinus Torvalds  * Documentation/sysctl/vm.txt.
111da177e4SLinus Torvalds  * Started 18.12.91
121da177e4SLinus Torvalds  * Swap aging added 23.2.95, Stephen Tweedie.
131da177e4SLinus Torvalds  * Buffermem limits added 12.3.98, Rik van Riel.
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/sched.h>
181da177e4SLinus Torvalds #include <linux/kernel_stat.h>
191da177e4SLinus Torvalds #include <linux/swap.h>
201da177e4SLinus Torvalds #include <linux/mman.h>
211da177e4SLinus Torvalds #include <linux/pagemap.h>
221da177e4SLinus Torvalds #include <linux/pagevec.h>
231da177e4SLinus Torvalds #include <linux/init.h>
241da177e4SLinus Torvalds #include <linux/module.h>
251da177e4SLinus Torvalds #include <linux/mm_inline.h>
261da177e4SLinus Torvalds #include <linux/buffer_head.h>	/* for try_to_release_page() */
271da177e4SLinus Torvalds #include <linux/percpu_counter.h>
281da177e4SLinus Torvalds #include <linux/percpu.h>
291da177e4SLinus Torvalds #include <linux/cpu.h>
301da177e4SLinus Torvalds #include <linux/notifier.h>
31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h>
3266e1707bSBalbir Singh #include <linux/memcontrol.h>
335a0e3ad6STejun Heo #include <linux/gfp.h>
341da177e4SLinus Torvalds 
3564d6519dSLee Schermerhorn #include "internal.h"
3664d6519dSLee Schermerhorn 
371da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */
381da177e4SLinus Torvalds int page_cluster;
391da177e4SLinus Torvalds 
40f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
41f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
42*31560180SMinchan Kim static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
43902aaed0SHisashi Hifumi 
44b221385bSAdrian Bunk /*
45b221385bSAdrian Bunk  * This path almost never happens for VM activity - pages are normally
46b221385bSAdrian Bunk  * freed via pagevecs.  But it gets used by networking.
47b221385bSAdrian Bunk  */
48920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page)
49b221385bSAdrian Bunk {
50b221385bSAdrian Bunk 	if (PageLRU(page)) {
51b221385bSAdrian Bunk 		unsigned long flags;
52b221385bSAdrian Bunk 		struct zone *zone = page_zone(page);
53b221385bSAdrian Bunk 
54b221385bSAdrian Bunk 		spin_lock_irqsave(&zone->lru_lock, flags);
55b221385bSAdrian Bunk 		VM_BUG_ON(!PageLRU(page));
56b221385bSAdrian Bunk 		__ClearPageLRU(page);
57b221385bSAdrian Bunk 		del_page_from_lru(zone, page);
58b221385bSAdrian Bunk 		spin_unlock_irqrestore(&zone->lru_lock, flags);
59b221385bSAdrian Bunk 	}
6091807063SAndrea Arcangeli }
6191807063SAndrea Arcangeli 
6291807063SAndrea Arcangeli static void __put_single_page(struct page *page)
6391807063SAndrea Arcangeli {
6491807063SAndrea Arcangeli 	__page_cache_release(page);
65fc91668eSLi Hong 	free_hot_cold_page(page, 0);
66b221385bSAdrian Bunk }
67b221385bSAdrian Bunk 
6891807063SAndrea Arcangeli static void __put_compound_page(struct page *page)
6991807063SAndrea Arcangeli {
7091807063SAndrea Arcangeli 	compound_page_dtor *dtor;
7191807063SAndrea Arcangeli 
7291807063SAndrea Arcangeli 	__page_cache_release(page);
7391807063SAndrea Arcangeli 	dtor = get_compound_page_dtor(page);
7491807063SAndrea Arcangeli 	(*dtor)(page);
7591807063SAndrea Arcangeli }
7691807063SAndrea Arcangeli 
778519fb30SNick Piggin static void put_compound_page(struct page *page)
781da177e4SLinus Torvalds {
7991807063SAndrea Arcangeli 	if (unlikely(PageTail(page))) {
8091807063SAndrea Arcangeli 		/* __split_huge_page_refcount can run under us */
8191807063SAndrea Arcangeli 		struct page *page_head = page->first_page;
8291807063SAndrea Arcangeli 		smp_rmb();
8391807063SAndrea Arcangeli 		/*
8491807063SAndrea Arcangeli 		 * If PageTail is still set after smp_rmb() we can be sure
8591807063SAndrea Arcangeli 		 * that the page->first_page we read wasn't a dangling pointer.
8691807063SAndrea Arcangeli 		 * See __split_huge_page_refcount() smp_wmb().
8791807063SAndrea Arcangeli 		 */
8891807063SAndrea Arcangeli 		if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
8991807063SAndrea Arcangeli 			unsigned long flags;
9091807063SAndrea Arcangeli 			/*
9191807063SAndrea Arcangeli 			 * Verify that our page_head wasn't converted
9291807063SAndrea Arcangeli 			 * to a a regular page before we got a
9391807063SAndrea Arcangeli 			 * reference on it.
9491807063SAndrea Arcangeli 			 */
9591807063SAndrea Arcangeli 			if (unlikely(!PageHead(page_head))) {
9691807063SAndrea Arcangeli 				/* PageHead is cleared after PageTail */
9791807063SAndrea Arcangeli 				smp_rmb();
9891807063SAndrea Arcangeli 				VM_BUG_ON(PageTail(page));
9991807063SAndrea Arcangeli 				goto out_put_head;
10091807063SAndrea Arcangeli 			}
10191807063SAndrea Arcangeli 			/*
10291807063SAndrea Arcangeli 			 * Only run compound_lock on a valid PageHead,
10391807063SAndrea Arcangeli 			 * after having it pinned with
10491807063SAndrea Arcangeli 			 * get_page_unless_zero() above.
10591807063SAndrea Arcangeli 			 */
10691807063SAndrea Arcangeli 			smp_mb();
10791807063SAndrea Arcangeli 			/* page_head wasn't a dangling pointer */
10891807063SAndrea Arcangeli 			flags = compound_lock_irqsave(page_head);
10991807063SAndrea Arcangeli 			if (unlikely(!PageTail(page))) {
11091807063SAndrea Arcangeli 				/* __split_huge_page_refcount run before us */
11191807063SAndrea Arcangeli 				compound_unlock_irqrestore(page_head, flags);
11291807063SAndrea Arcangeli 				VM_BUG_ON(PageHead(page_head));
11391807063SAndrea Arcangeli 			out_put_head:
11491807063SAndrea Arcangeli 				if (put_page_testzero(page_head))
11591807063SAndrea Arcangeli 					__put_single_page(page_head);
11691807063SAndrea Arcangeli 			out_put_single:
11791807063SAndrea Arcangeli 				if (put_page_testzero(page))
11891807063SAndrea Arcangeli 					__put_single_page(page);
11991807063SAndrea Arcangeli 				return;
12091807063SAndrea Arcangeli 			}
12191807063SAndrea Arcangeli 			VM_BUG_ON(page_head != page->first_page);
12291807063SAndrea Arcangeli 			/*
12391807063SAndrea Arcangeli 			 * We can release the refcount taken by
12491807063SAndrea Arcangeli 			 * get_page_unless_zero now that
12591807063SAndrea Arcangeli 			 * split_huge_page_refcount is blocked on the
12691807063SAndrea Arcangeli 			 * compound_lock.
12791807063SAndrea Arcangeli 			 */
12891807063SAndrea Arcangeli 			if (put_page_testzero(page_head))
12991807063SAndrea Arcangeli 				VM_BUG_ON(1);
13091807063SAndrea Arcangeli 			/* __split_huge_page_refcount will wait now */
13191807063SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page->_count) <= 0);
13291807063SAndrea Arcangeli 			atomic_dec(&page->_count);
13391807063SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
13491807063SAndrea Arcangeli 			compound_unlock_irqrestore(page_head, flags);
135a95a82e9SAndrea Arcangeli 			if (put_page_testzero(page_head)) {
136a95a82e9SAndrea Arcangeli 				if (PageHead(page_head))
13791807063SAndrea Arcangeli 					__put_compound_page(page_head);
138a95a82e9SAndrea Arcangeli 				else
139a95a82e9SAndrea Arcangeli 					__put_single_page(page_head);
140a95a82e9SAndrea Arcangeli 			}
14191807063SAndrea Arcangeli 		} else {
14291807063SAndrea Arcangeli 			/* page_head is a dangling pointer */
14391807063SAndrea Arcangeli 			VM_BUG_ON(PageTail(page));
14491807063SAndrea Arcangeli 			goto out_put_single;
14591807063SAndrea Arcangeli 		}
14691807063SAndrea Arcangeli 	} else if (put_page_testzero(page)) {
14791807063SAndrea Arcangeli 		if (PageHead(page))
14891807063SAndrea Arcangeli 			__put_compound_page(page);
14991807063SAndrea Arcangeli 		else
15091807063SAndrea Arcangeli 			__put_single_page(page);
1511da177e4SLinus Torvalds 	}
1521da177e4SLinus Torvalds }
1538519fb30SNick Piggin 
1548519fb30SNick Piggin void put_page(struct page *page)
1558519fb30SNick Piggin {
1568519fb30SNick Piggin 	if (unlikely(PageCompound(page)))
1578519fb30SNick Piggin 		put_compound_page(page);
1588519fb30SNick Piggin 	else if (put_page_testzero(page))
15991807063SAndrea Arcangeli 		__put_single_page(page);
1601da177e4SLinus Torvalds }
1611da177e4SLinus Torvalds EXPORT_SYMBOL(put_page);
1621da177e4SLinus Torvalds 
1631d7ea732SAlexander Zarochentsev /**
1647682486bSRandy Dunlap  * put_pages_list() - release a list of pages
1657682486bSRandy Dunlap  * @pages: list of pages threaded on page->lru
1661d7ea732SAlexander Zarochentsev  *
1671d7ea732SAlexander Zarochentsev  * Release a list of pages which are strung together on page.lru.  Currently
1681d7ea732SAlexander Zarochentsev  * used by read_cache_pages() and related error recovery code.
1691d7ea732SAlexander Zarochentsev  */
1701d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages)
1711d7ea732SAlexander Zarochentsev {
1721d7ea732SAlexander Zarochentsev 	while (!list_empty(pages)) {
1731d7ea732SAlexander Zarochentsev 		struct page *victim;
1741d7ea732SAlexander Zarochentsev 
1751d7ea732SAlexander Zarochentsev 		victim = list_entry(pages->prev, struct page, lru);
1761d7ea732SAlexander Zarochentsev 		list_del(&victim->lru);
1771d7ea732SAlexander Zarochentsev 		page_cache_release(victim);
1781d7ea732SAlexander Zarochentsev 	}
1791d7ea732SAlexander Zarochentsev }
1801d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list);
1811d7ea732SAlexander Zarochentsev 
18283896fb5SLinus Torvalds /*
18383896fb5SLinus Torvalds  * pagevec_move_tail() must be called with IRQ disabled.
18483896fb5SLinus Torvalds  * Otherwise this may cause nasty races.
18583896fb5SLinus Torvalds  */
18683896fb5SLinus Torvalds static void pagevec_move_tail(struct pagevec *pvec)
187902aaed0SHisashi Hifumi {
188902aaed0SHisashi Hifumi 	int i;
18983896fb5SLinus Torvalds 	int pgmoved = 0;
190902aaed0SHisashi Hifumi 	struct zone *zone = NULL;
191902aaed0SHisashi Hifumi 
192902aaed0SHisashi Hifumi 	for (i = 0; i < pagevec_count(pvec); i++) {
193902aaed0SHisashi Hifumi 		struct page *page = pvec->pages[i];
194902aaed0SHisashi Hifumi 		struct zone *pagezone = page_zone(page);
195902aaed0SHisashi Hifumi 
196902aaed0SHisashi Hifumi 		if (pagezone != zone) {
197902aaed0SHisashi Hifumi 			if (zone)
19883896fb5SLinus Torvalds 				spin_unlock(&zone->lru_lock);
199902aaed0SHisashi Hifumi 			zone = pagezone;
20083896fb5SLinus Torvalds 			spin_lock(&zone->lru_lock);
201902aaed0SHisashi Hifumi 		}
202894bc310SLee Schermerhorn 		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
203401a8e1cSJohannes Weiner 			int lru = page_lru_base_type(page);
2044f98a2feSRik van Riel 			list_move_tail(&page->lru, &zone->lru[lru].list);
20583896fb5SLinus Torvalds 			pgmoved++;
206902aaed0SHisashi Hifumi 		}
207902aaed0SHisashi Hifumi 	}
20883896fb5SLinus Torvalds 	if (zone)
20983896fb5SLinus Torvalds 		spin_unlock(&zone->lru_lock);
210902aaed0SHisashi Hifumi 	__count_vm_events(PGROTATED, pgmoved);
21183896fb5SLinus Torvalds 	release_pages(pvec->pages, pvec->nr, pvec->cold);
21283896fb5SLinus Torvalds 	pagevec_reinit(pvec);
213902aaed0SHisashi Hifumi }
214902aaed0SHisashi Hifumi 
215902aaed0SHisashi Hifumi /*
2161da177e4SLinus Torvalds  * Writeback is about to end against a page which has been marked for immediate
2171da177e4SLinus Torvalds  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
218902aaed0SHisashi Hifumi  * inactive list.
2191da177e4SLinus Torvalds  */
220ac6aadb2SMiklos Szeredi void  rotate_reclaimable_page(struct page *page)
2211da177e4SLinus Torvalds {
222ac6aadb2SMiklos Szeredi 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
223894bc310SLee Schermerhorn 	    !PageUnevictable(page) && PageLRU(page)) {
224902aaed0SHisashi Hifumi 		struct pagevec *pvec;
2251da177e4SLinus Torvalds 		unsigned long flags;
2261da177e4SLinus Torvalds 
227902aaed0SHisashi Hifumi 		page_cache_get(page);
228902aaed0SHisashi Hifumi 		local_irq_save(flags);
229902aaed0SHisashi Hifumi 		pvec = &__get_cpu_var(lru_rotate_pvecs);
230902aaed0SHisashi Hifumi 		if (!pagevec_add(pvec, page))
231902aaed0SHisashi Hifumi 			pagevec_move_tail(pvec);
232902aaed0SHisashi Hifumi 		local_irq_restore(flags);
233ac6aadb2SMiklos Szeredi 	}
2341da177e4SLinus Torvalds }
2351da177e4SLinus Torvalds 
2363e2f41f1SKOSAKI Motohiro static void update_page_reclaim_stat(struct zone *zone, struct page *page,
2373e2f41f1SKOSAKI Motohiro 				     int file, int rotated)
2383e2f41f1SKOSAKI Motohiro {
2393e2f41f1SKOSAKI Motohiro 	struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
2403e2f41f1SKOSAKI Motohiro 	struct zone_reclaim_stat *memcg_reclaim_stat;
2413e2f41f1SKOSAKI Motohiro 
2423e2f41f1SKOSAKI Motohiro 	memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
2433e2f41f1SKOSAKI Motohiro 
2443e2f41f1SKOSAKI Motohiro 	reclaim_stat->recent_scanned[file]++;
2453e2f41f1SKOSAKI Motohiro 	if (rotated)
2463e2f41f1SKOSAKI Motohiro 		reclaim_stat->recent_rotated[file]++;
2473e2f41f1SKOSAKI Motohiro 
2483e2f41f1SKOSAKI Motohiro 	if (!memcg_reclaim_stat)
2493e2f41f1SKOSAKI Motohiro 		return;
2503e2f41f1SKOSAKI Motohiro 
2513e2f41f1SKOSAKI Motohiro 	memcg_reclaim_stat->recent_scanned[file]++;
2523e2f41f1SKOSAKI Motohiro 	if (rotated)
2533e2f41f1SKOSAKI Motohiro 		memcg_reclaim_stat->recent_rotated[file]++;
2543e2f41f1SKOSAKI Motohiro }
2553e2f41f1SKOSAKI Motohiro 
2561da177e4SLinus Torvalds /*
2577a608572SLinus Torvalds  * FIXME: speed this up?
2581da177e4SLinus Torvalds  */
2597a608572SLinus Torvalds void activate_page(struct page *page)
260744ed144SShaohua Li {
261744ed144SShaohua Li 	struct zone *zone = page_zone(page);
2627a608572SLinus Torvalds 
2637a608572SLinus Torvalds 	spin_lock_irq(&zone->lru_lock);
2647a608572SLinus Torvalds 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
265744ed144SShaohua Li 		int file = page_is_file_cache(page);
266744ed144SShaohua Li 		int lru = page_lru_base_type(page);
267744ed144SShaohua Li 		del_page_from_lru_list(zone, page, lru);
268744ed144SShaohua Li 
269744ed144SShaohua Li 		SetPageActive(page);
270744ed144SShaohua Li 		lru += LRU_ACTIVE;
271744ed144SShaohua Li 		add_page_to_lru_list(zone, page, lru);
272744ed144SShaohua Li 		__count_vm_event(PGACTIVATE);
2737a608572SLinus Torvalds 
274744ed144SShaohua Li 		update_page_reclaim_stat(zone, page, file, 1);
275744ed144SShaohua Li 	}
2761da177e4SLinus Torvalds 	spin_unlock_irq(&zone->lru_lock);
2771da177e4SLinus Torvalds }
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds /*
2801da177e4SLinus Torvalds  * Mark a page as having seen activity.
2811da177e4SLinus Torvalds  *
2821da177e4SLinus Torvalds  * inactive,unreferenced	->	inactive,referenced
2831da177e4SLinus Torvalds  * inactive,referenced		->	active,unreferenced
2841da177e4SLinus Torvalds  * active,unreferenced		->	active,referenced
2851da177e4SLinus Torvalds  */
286920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page)
2871da177e4SLinus Torvalds {
288894bc310SLee Schermerhorn 	if (!PageActive(page) && !PageUnevictable(page) &&
289894bc310SLee Schermerhorn 			PageReferenced(page) && PageLRU(page)) {
2901da177e4SLinus Torvalds 		activate_page(page);
2911da177e4SLinus Torvalds 		ClearPageReferenced(page);
2921da177e4SLinus Torvalds 	} else if (!PageReferenced(page)) {
2931da177e4SLinus Torvalds 		SetPageReferenced(page);
2941da177e4SLinus Torvalds 	}
2951da177e4SLinus Torvalds }
2961da177e4SLinus Torvalds 
2971da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed);
2981da177e4SLinus Torvalds 
299f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru)
3001da177e4SLinus Torvalds {
301f04e9ebbSKOSAKI Motohiro 	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds 	page_cache_get(page);
3041da177e4SLinus Torvalds 	if (!pagevec_add(pvec, page))
305f04e9ebbSKOSAKI Motohiro 		____pagevec_lru_add(pvec, lru);
3061da177e4SLinus Torvalds 	put_cpu_var(lru_add_pvecs);
3071da177e4SLinus Torvalds }
30847846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add);
3091da177e4SLinus Torvalds 
310f04e9ebbSKOSAKI Motohiro /**
311f04e9ebbSKOSAKI Motohiro  * lru_cache_add_lru - add a page to a page list
312f04e9ebbSKOSAKI Motohiro  * @page: the page to be added to the LRU.
313f04e9ebbSKOSAKI Motohiro  * @lru: the LRU list to which the page is added.
314f04e9ebbSKOSAKI Motohiro  */
315f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru)
3161da177e4SLinus Torvalds {
317f04e9ebbSKOSAKI Motohiro 	if (PageActive(page)) {
318894bc310SLee Schermerhorn 		VM_BUG_ON(PageUnevictable(page));
319f04e9ebbSKOSAKI Motohiro 		ClearPageActive(page);
320894bc310SLee Schermerhorn 	} else if (PageUnevictable(page)) {
321894bc310SLee Schermerhorn 		VM_BUG_ON(PageActive(page));
322894bc310SLee Schermerhorn 		ClearPageUnevictable(page);
323f04e9ebbSKOSAKI Motohiro 	}
3241da177e4SLinus Torvalds 
325894bc310SLee Schermerhorn 	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
326f04e9ebbSKOSAKI Motohiro 	__lru_cache_add(page, lru);
3271da177e4SLinus Torvalds }
3281da177e4SLinus Torvalds 
329894bc310SLee Schermerhorn /**
330894bc310SLee Schermerhorn  * add_page_to_unevictable_list - add a page to the unevictable list
331894bc310SLee Schermerhorn  * @page:  the page to be added to the unevictable list
332894bc310SLee Schermerhorn  *
333894bc310SLee Schermerhorn  * Add page directly to its zone's unevictable list.  To avoid races with
334894bc310SLee Schermerhorn  * tasks that might be making the page evictable, through eg. munlock,
335894bc310SLee Schermerhorn  * munmap or exit, while it's not on the lru, we want to add the page
336894bc310SLee Schermerhorn  * while it's locked or otherwise "invisible" to other tasks.  This is
337894bc310SLee Schermerhorn  * difficult to do when using the pagevec cache, so bypass that.
338894bc310SLee Schermerhorn  */
339894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page)
340894bc310SLee Schermerhorn {
341894bc310SLee Schermerhorn 	struct zone *zone = page_zone(page);
342894bc310SLee Schermerhorn 
343894bc310SLee Schermerhorn 	spin_lock_irq(&zone->lru_lock);
344894bc310SLee Schermerhorn 	SetPageUnevictable(page);
345894bc310SLee Schermerhorn 	SetPageLRU(page);
346894bc310SLee Schermerhorn 	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
347894bc310SLee Schermerhorn 	spin_unlock_irq(&zone->lru_lock);
348894bc310SLee Schermerhorn }
349894bc310SLee Schermerhorn 
350902aaed0SHisashi Hifumi /*
351*31560180SMinchan Kim  * If the page can not be invalidated, it is moved to the
352*31560180SMinchan Kim  * inactive list to speed up its reclaim.  It is moved to the
353*31560180SMinchan Kim  * head of the list, rather than the tail, to give the flusher
354*31560180SMinchan Kim  * threads some time to write it out, as this is much more
355*31560180SMinchan Kim  * effective than the single-page writeout from reclaim.
356*31560180SMinchan Kim  */
357*31560180SMinchan Kim static void lru_deactivate(struct page *page, struct zone *zone)
358*31560180SMinchan Kim {
359*31560180SMinchan Kim 	int lru, file;
360*31560180SMinchan Kim 
361*31560180SMinchan Kim 	if (!PageLRU(page) || !PageActive(page))
362*31560180SMinchan Kim 		return;
363*31560180SMinchan Kim 
364*31560180SMinchan Kim 	/* Some processes are using the page */
365*31560180SMinchan Kim 	if (page_mapped(page))
366*31560180SMinchan Kim 		return;
367*31560180SMinchan Kim 
368*31560180SMinchan Kim 	file = page_is_file_cache(page);
369*31560180SMinchan Kim 	lru = page_lru_base_type(page);
370*31560180SMinchan Kim 	del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
371*31560180SMinchan Kim 	ClearPageActive(page);
372*31560180SMinchan Kim 	ClearPageReferenced(page);
373*31560180SMinchan Kim 	add_page_to_lru_list(zone, page, lru);
374*31560180SMinchan Kim 	__count_vm_event(PGDEACTIVATE);
375*31560180SMinchan Kim 
376*31560180SMinchan Kim 	update_page_reclaim_stat(zone, page, file, 0);
377*31560180SMinchan Kim }
378*31560180SMinchan Kim 
379*31560180SMinchan Kim static void ____pagevec_lru_deactivate(struct pagevec *pvec)
380*31560180SMinchan Kim {
381*31560180SMinchan Kim 	int i;
382*31560180SMinchan Kim 	struct zone *zone = NULL;
383*31560180SMinchan Kim 
384*31560180SMinchan Kim 	for (i = 0; i < pagevec_count(pvec); i++) {
385*31560180SMinchan Kim 		struct page *page = pvec->pages[i];
386*31560180SMinchan Kim 		struct zone *pagezone = page_zone(page);
387*31560180SMinchan Kim 
388*31560180SMinchan Kim 		if (pagezone != zone) {
389*31560180SMinchan Kim 			if (zone)
390*31560180SMinchan Kim 				spin_unlock_irq(&zone->lru_lock);
391*31560180SMinchan Kim 			zone = pagezone;
392*31560180SMinchan Kim 			spin_lock_irq(&zone->lru_lock);
393*31560180SMinchan Kim 		}
394*31560180SMinchan Kim 		lru_deactivate(page, zone);
395*31560180SMinchan Kim 	}
396*31560180SMinchan Kim 	if (zone)
397*31560180SMinchan Kim 		spin_unlock_irq(&zone->lru_lock);
398*31560180SMinchan Kim 
399*31560180SMinchan Kim 	release_pages(pvec->pages, pvec->nr, pvec->cold);
400*31560180SMinchan Kim 	pagevec_reinit(pvec);
401*31560180SMinchan Kim }
402*31560180SMinchan Kim 
403*31560180SMinchan Kim 
404*31560180SMinchan Kim /*
405902aaed0SHisashi Hifumi  * Drain pages out of the cpu's pagevecs.
406902aaed0SHisashi Hifumi  * Either "cpu" is the current CPU, and preemption has already been
407902aaed0SHisashi Hifumi  * disabled; or "cpu" is being hot-unplugged, and is already dead.
408902aaed0SHisashi Hifumi  */
409902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu)
4101da177e4SLinus Torvalds {
411f04e9ebbSKOSAKI Motohiro 	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
412902aaed0SHisashi Hifumi 	struct pagevec *pvec;
413f04e9ebbSKOSAKI Motohiro 	int lru;
4141da177e4SLinus Torvalds 
415f04e9ebbSKOSAKI Motohiro 	for_each_lru(lru) {
416f04e9ebbSKOSAKI Motohiro 		pvec = &pvecs[lru - LRU_BASE];
4171da177e4SLinus Torvalds 		if (pagevec_count(pvec))
418f04e9ebbSKOSAKI Motohiro 			____pagevec_lru_add(pvec, lru);
419f04e9ebbSKOSAKI Motohiro 	}
420902aaed0SHisashi Hifumi 
421902aaed0SHisashi Hifumi 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
422902aaed0SHisashi Hifumi 	if (pagevec_count(pvec)) {
423902aaed0SHisashi Hifumi 		unsigned long flags;
424902aaed0SHisashi Hifumi 
425902aaed0SHisashi Hifumi 		/* No harm done if a racing interrupt already did this */
426902aaed0SHisashi Hifumi 		local_irq_save(flags);
427902aaed0SHisashi Hifumi 		pagevec_move_tail(pvec);
428902aaed0SHisashi Hifumi 		local_irq_restore(flags);
429902aaed0SHisashi Hifumi 	}
430*31560180SMinchan Kim 
431*31560180SMinchan Kim 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
432*31560180SMinchan Kim 	if (pagevec_count(pvec))
433*31560180SMinchan Kim 		____pagevec_lru_deactivate(pvec);
434*31560180SMinchan Kim }
435*31560180SMinchan Kim 
436*31560180SMinchan Kim /**
437*31560180SMinchan Kim  * deactivate_page - forcefully deactivate a page
438*31560180SMinchan Kim  * @page: page to deactivate
439*31560180SMinchan Kim  *
440*31560180SMinchan Kim  * This function hints the VM that @page is a good reclaim candidate,
441*31560180SMinchan Kim  * for example if its invalidation fails due to the page being dirty
442*31560180SMinchan Kim  * or under writeback.
443*31560180SMinchan Kim  */
444*31560180SMinchan Kim void deactivate_page(struct page *page)
445*31560180SMinchan Kim {
446*31560180SMinchan Kim 	if (likely(get_page_unless_zero(page))) {
447*31560180SMinchan Kim 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
448*31560180SMinchan Kim 
449*31560180SMinchan Kim 		if (!pagevec_add(pvec, page))
450*31560180SMinchan Kim 			____pagevec_lru_deactivate(pvec);
451*31560180SMinchan Kim 		put_cpu_var(lru_deactivate_pvecs);
452*31560180SMinchan Kim 	}
45380bfed90SAndrew Morton }
45480bfed90SAndrew Morton 
45580bfed90SAndrew Morton void lru_add_drain(void)
45680bfed90SAndrew Morton {
457902aaed0SHisashi Hifumi 	drain_cpu_pagevecs(get_cpu());
45880bfed90SAndrew Morton 	put_cpu();
4591da177e4SLinus Torvalds }
4601da177e4SLinus Torvalds 
461c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy)
462053837fcSNick Piggin {
463053837fcSNick Piggin 	lru_add_drain();
464053837fcSNick Piggin }
465053837fcSNick Piggin 
466053837fcSNick Piggin /*
467053837fcSNick Piggin  * Returns 0 for success
468053837fcSNick Piggin  */
469053837fcSNick Piggin int lru_add_drain_all(void)
470053837fcSNick Piggin {
471c4028958SDavid Howells 	return schedule_on_each_cpu(lru_add_drain_per_cpu);
472053837fcSNick Piggin }
473053837fcSNick Piggin 
4741da177e4SLinus Torvalds /*
4751da177e4SLinus Torvalds  * Batched page_cache_release().  Decrement the reference count on all the
4761da177e4SLinus Torvalds  * passed pages.  If it fell to zero then remove the page from the LRU and
4771da177e4SLinus Torvalds  * free it.
4781da177e4SLinus Torvalds  *
4791da177e4SLinus Torvalds  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
4801da177e4SLinus Torvalds  * for the remainder of the operation.
4811da177e4SLinus Torvalds  *
482ab33dc09SFernando Luis Vazquez Cao  * The locking in this function is against shrink_inactive_list(): we recheck
483ab33dc09SFernando Luis Vazquez Cao  * the page count inside the lock to see whether shrink_inactive_list()
484ab33dc09SFernando Luis Vazquez Cao  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
485ab33dc09SFernando Luis Vazquez Cao  * will free it.
4861da177e4SLinus Torvalds  */
4871da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold)
4881da177e4SLinus Torvalds {
4891da177e4SLinus Torvalds 	int i;
4901da177e4SLinus Torvalds 	struct pagevec pages_to_free;
4911da177e4SLinus Torvalds 	struct zone *zone = NULL;
492902aaed0SHisashi Hifumi 	unsigned long uninitialized_var(flags);
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 	pagevec_init(&pages_to_free, cold);
4951da177e4SLinus Torvalds 	for (i = 0; i < nr; i++) {
4961da177e4SLinus Torvalds 		struct page *page = pages[i];
4971da177e4SLinus Torvalds 
4988519fb30SNick Piggin 		if (unlikely(PageCompound(page))) {
4998519fb30SNick Piggin 			if (zone) {
500902aaed0SHisashi Hifumi 				spin_unlock_irqrestore(&zone->lru_lock, flags);
5018519fb30SNick Piggin 				zone = NULL;
5028519fb30SNick Piggin 			}
5038519fb30SNick Piggin 			put_compound_page(page);
5048519fb30SNick Piggin 			continue;
5058519fb30SNick Piggin 		}
5068519fb30SNick Piggin 
507b5810039SNick Piggin 		if (!put_page_testzero(page))
5081da177e4SLinus Torvalds 			continue;
5091da177e4SLinus Torvalds 
51046453a6eSNick Piggin 		if (PageLRU(page)) {
51146453a6eSNick Piggin 			struct zone *pagezone = page_zone(page);
512894bc310SLee Schermerhorn 
5131da177e4SLinus Torvalds 			if (pagezone != zone) {
5141da177e4SLinus Torvalds 				if (zone)
515902aaed0SHisashi Hifumi 					spin_unlock_irqrestore(&zone->lru_lock,
516902aaed0SHisashi Hifumi 									flags);
5171da177e4SLinus Torvalds 				zone = pagezone;
518902aaed0SHisashi Hifumi 				spin_lock_irqsave(&zone->lru_lock, flags);
5191da177e4SLinus Torvalds 			}
520725d704eSNick Piggin 			VM_BUG_ON(!PageLRU(page));
52167453911SNick Piggin 			__ClearPageLRU(page);
5221da177e4SLinus Torvalds 			del_page_from_lru(zone, page);
52346453a6eSNick Piggin 		}
52446453a6eSNick Piggin 
5251da177e4SLinus Torvalds 		if (!pagevec_add(&pages_to_free, page)) {
52646453a6eSNick Piggin 			if (zone) {
527902aaed0SHisashi Hifumi 				spin_unlock_irqrestore(&zone->lru_lock, flags);
52846453a6eSNick Piggin 				zone = NULL;
52946453a6eSNick Piggin 			}
5301da177e4SLinus Torvalds 			__pagevec_free(&pages_to_free);
5311da177e4SLinus Torvalds 			pagevec_reinit(&pages_to_free);
5321da177e4SLinus Torvalds   		}
5331da177e4SLinus Torvalds 	}
5341da177e4SLinus Torvalds 	if (zone)
535902aaed0SHisashi Hifumi 		spin_unlock_irqrestore(&zone->lru_lock, flags);
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds 	pagevec_free(&pages_to_free);
5381da177e4SLinus Torvalds }
5390be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages);
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds /*
5421da177e4SLinus Torvalds  * The pages which we're about to release may be in the deferred lru-addition
5431da177e4SLinus Torvalds  * queues.  That would prevent them from really being freed right now.  That's
5441da177e4SLinus Torvalds  * OK from a correctness point of view but is inefficient - those pages may be
5451da177e4SLinus Torvalds  * cache-warm and we want to give them back to the page allocator ASAP.
5461da177e4SLinus Torvalds  *
5471da177e4SLinus Torvalds  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
5481da177e4SLinus Torvalds  * and __pagevec_lru_add_active() call release_pages() directly to avoid
5491da177e4SLinus Torvalds  * mutual recursion.
5501da177e4SLinus Torvalds  */
5511da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec)
5521da177e4SLinus Torvalds {
5531da177e4SLinus Torvalds 	lru_add_drain();
5541da177e4SLinus Torvalds 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
5551da177e4SLinus Torvalds 	pagevec_reinit(pvec);
5561da177e4SLinus Torvalds }
5571da177e4SLinus Torvalds 
5587f285701SSteve French EXPORT_SYMBOL(__pagevec_release);
5597f285701SSteve French 
56071e3aac0SAndrea Arcangeli /* used by __split_huge_page_refcount() */
56171e3aac0SAndrea Arcangeli void lru_add_page_tail(struct zone* zone,
56271e3aac0SAndrea Arcangeli 		       struct page *page, struct page *page_tail)
56371e3aac0SAndrea Arcangeli {
56471e3aac0SAndrea Arcangeli 	int active;
56571e3aac0SAndrea Arcangeli 	enum lru_list lru;
56671e3aac0SAndrea Arcangeli 	const int file = 0;
56771e3aac0SAndrea Arcangeli 	struct list_head *head;
56871e3aac0SAndrea Arcangeli 
56971e3aac0SAndrea Arcangeli 	VM_BUG_ON(!PageHead(page));
57071e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageCompound(page_tail));
57171e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageLRU(page_tail));
57271e3aac0SAndrea Arcangeli 	VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
57371e3aac0SAndrea Arcangeli 
57471e3aac0SAndrea Arcangeli 	SetPageLRU(page_tail);
57571e3aac0SAndrea Arcangeli 
57671e3aac0SAndrea Arcangeli 	if (page_evictable(page_tail, NULL)) {
57771e3aac0SAndrea Arcangeli 		if (PageActive(page)) {
57871e3aac0SAndrea Arcangeli 			SetPageActive(page_tail);
57971e3aac0SAndrea Arcangeli 			active = 1;
58071e3aac0SAndrea Arcangeli 			lru = LRU_ACTIVE_ANON;
58171e3aac0SAndrea Arcangeli 		} else {
58271e3aac0SAndrea Arcangeli 			active = 0;
58371e3aac0SAndrea Arcangeli 			lru = LRU_INACTIVE_ANON;
58471e3aac0SAndrea Arcangeli 		}
58571e3aac0SAndrea Arcangeli 		update_page_reclaim_stat(zone, page_tail, file, active);
58671e3aac0SAndrea Arcangeli 		if (likely(PageLRU(page)))
58771e3aac0SAndrea Arcangeli 			head = page->lru.prev;
58871e3aac0SAndrea Arcangeli 		else
58971e3aac0SAndrea Arcangeli 			head = &zone->lru[lru].list;
59071e3aac0SAndrea Arcangeli 		__add_page_to_lru_list(zone, page_tail, lru, head);
59171e3aac0SAndrea Arcangeli 	} else {
59271e3aac0SAndrea Arcangeli 		SetPageUnevictable(page_tail);
59371e3aac0SAndrea Arcangeli 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
59471e3aac0SAndrea Arcangeli 	}
59571e3aac0SAndrea Arcangeli }
59671e3aac0SAndrea Arcangeli 
5971da177e4SLinus Torvalds /*
5981da177e4SLinus Torvalds  * Add the passed pages to the LRU, then drop the caller's refcount
5991da177e4SLinus Torvalds  * on them.  Reinitialises the caller's pagevec.
6001da177e4SLinus Torvalds  */
601f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
6021da177e4SLinus Torvalds {
60383896fb5SLinus Torvalds 	int i;
60483896fb5SLinus Torvalds 	struct zone *zone = NULL;
60583896fb5SLinus Torvalds 
606894bc310SLee Schermerhorn 	VM_BUG_ON(is_unevictable_lru(lru));
6071da177e4SLinus Torvalds 
60883896fb5SLinus Torvalds 	for (i = 0; i < pagevec_count(pvec); i++) {
60983896fb5SLinus Torvalds 		struct page *page = pvec->pages[i];
61083896fb5SLinus Torvalds 		struct zone *pagezone = page_zone(page);
61183896fb5SLinus Torvalds 		int file;
61283896fb5SLinus Torvalds 		int active;
61383896fb5SLinus Torvalds 
61483896fb5SLinus Torvalds 		if (pagezone != zone) {
61583896fb5SLinus Torvalds 			if (zone)
61683896fb5SLinus Torvalds 				spin_unlock_irq(&zone->lru_lock);
61783896fb5SLinus Torvalds 			zone = pagezone;
61883896fb5SLinus Torvalds 			spin_lock_irq(&zone->lru_lock);
61983896fb5SLinus Torvalds 		}
62083896fb5SLinus Torvalds 		VM_BUG_ON(PageActive(page));
62183896fb5SLinus Torvalds 		VM_BUG_ON(PageUnevictable(page));
62283896fb5SLinus Torvalds 		VM_BUG_ON(PageLRU(page));
62383896fb5SLinus Torvalds 		SetPageLRU(page);
62483896fb5SLinus Torvalds 		active = is_active_lru(lru);
62583896fb5SLinus Torvalds 		file = is_file_lru(lru);
62683896fb5SLinus Torvalds 		if (active)
62783896fb5SLinus Torvalds 			SetPageActive(page);
62883896fb5SLinus Torvalds 		update_page_reclaim_stat(zone, page, file, active);
62983896fb5SLinus Torvalds 		add_page_to_lru_list(zone, page, lru);
63083896fb5SLinus Torvalds 	}
63183896fb5SLinus Torvalds 	if (zone)
63283896fb5SLinus Torvalds 		spin_unlock_irq(&zone->lru_lock);
63383896fb5SLinus Torvalds 	release_pages(pvec->pages, pvec->nr, pvec->cold);
63483896fb5SLinus Torvalds 	pagevec_reinit(pvec);
6351da177e4SLinus Torvalds }
6361da177e4SLinus Torvalds 
637f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add);
638f04e9ebbSKOSAKI Motohiro 
6391da177e4SLinus Torvalds /*
6401da177e4SLinus Torvalds  * Try to drop buffers from the pages in a pagevec
6411da177e4SLinus Torvalds  */
6421da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec)
6431da177e4SLinus Torvalds {
6441da177e4SLinus Torvalds 	int i;
6451da177e4SLinus Torvalds 
6461da177e4SLinus Torvalds 	for (i = 0; i < pagevec_count(pvec); i++) {
6471da177e4SLinus Torvalds 		struct page *page = pvec->pages[i];
6481da177e4SLinus Torvalds 
649266cf658SDavid Howells 		if (page_has_private(page) && trylock_page(page)) {
650266cf658SDavid Howells 			if (page_has_private(page))
6511da177e4SLinus Torvalds 				try_to_release_page(page, 0);
6521da177e4SLinus Torvalds 			unlock_page(page);
6531da177e4SLinus Torvalds 		}
6541da177e4SLinus Torvalds 	}
6551da177e4SLinus Torvalds }
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds /**
6581da177e4SLinus Torvalds  * pagevec_lookup - gang pagecache lookup
6591da177e4SLinus Torvalds  * @pvec:	Where the resulting pages are placed
6601da177e4SLinus Torvalds  * @mapping:	The address_space to search
6611da177e4SLinus Torvalds  * @start:	The starting page index
6621da177e4SLinus Torvalds  * @nr_pages:	The maximum number of pages
6631da177e4SLinus Torvalds  *
6641da177e4SLinus Torvalds  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
6651da177e4SLinus Torvalds  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
6661da177e4SLinus Torvalds  * reference against the pages in @pvec.
6671da177e4SLinus Torvalds  *
6681da177e4SLinus Torvalds  * The search returns a group of mapping-contiguous pages with ascending
6691da177e4SLinus Torvalds  * indexes.  There may be holes in the indices due to not-present pages.
6701da177e4SLinus Torvalds  *
6711da177e4SLinus Torvalds  * pagevec_lookup() returns the number of pages which were found.
6721da177e4SLinus Torvalds  */
6731da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
6741da177e4SLinus Torvalds 		pgoff_t start, unsigned nr_pages)
6751da177e4SLinus Torvalds {
6761da177e4SLinus Torvalds 	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
6771da177e4SLinus Torvalds 	return pagevec_count(pvec);
6781da177e4SLinus Torvalds }
6791da177e4SLinus Torvalds 
68078539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup);
68178539fdfSChristoph Hellwig 
6821da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
6831da177e4SLinus Torvalds 		pgoff_t *index, int tag, unsigned nr_pages)
6841da177e4SLinus Torvalds {
6851da177e4SLinus Torvalds 	pvec->nr = find_get_pages_tag(mapping, index, tag,
6861da177e4SLinus Torvalds 					nr_pages, pvec->pages);
6871da177e4SLinus Torvalds 	return pagevec_count(pvec);
6881da177e4SLinus Torvalds }
6891da177e4SLinus Torvalds 
6907f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag);
6911da177e4SLinus Torvalds 
6921da177e4SLinus Torvalds /*
6931da177e4SLinus Torvalds  * Perform any setup for the swap system
6941da177e4SLinus Torvalds  */
6951da177e4SLinus Torvalds void __init swap_setup(void)
6961da177e4SLinus Torvalds {
6974481374cSJan Beulich 	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
6981da177e4SLinus Torvalds 
699e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP
700e0bf68ddSPeter Zijlstra 	bdi_init(swapper_space.backing_dev_info);
701e0bf68ddSPeter Zijlstra #endif
702e0bf68ddSPeter Zijlstra 
7031da177e4SLinus Torvalds 	/* Use a smaller cluster for small-memory machines */
7041da177e4SLinus Torvalds 	if (megs < 16)
7051da177e4SLinus Torvalds 		page_cluster = 2;
7061da177e4SLinus Torvalds 	else
7071da177e4SLinus Torvalds 		page_cluster = 3;
7081da177e4SLinus Torvalds 	/*
7091da177e4SLinus Torvalds 	 * Right now other parts of the system means that we
7101da177e4SLinus Torvalds 	 * _really_ don't want to cluster much more
7111da177e4SLinus Torvalds 	 */
7121da177e4SLinus Torvalds }
713