xref: /linux/mm/swap.c (revision 059285a25f30c13ed4f5d91cecd6094b9b20bb7b)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  linux/mm/swap.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
51da177e4SLinus Torvalds  */
61da177e4SLinus Torvalds 
71da177e4SLinus Torvalds /*
8183ff22bSSimon Arlott  * This file contains the default values for the operation of the
91da177e4SLinus Torvalds  * Linux VM subsystem. Fine-tuning documentation can be found in
101da177e4SLinus Torvalds  * Documentation/sysctl/vm.txt.
111da177e4SLinus Torvalds  * Started 18.12.91
121da177e4SLinus Torvalds  * Swap aging added 23.2.95, Stephen Tweedie.
131da177e4SLinus Torvalds  * Buffermem limits added 12.3.98, Rik van Riel.
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/sched.h>
181da177e4SLinus Torvalds #include <linux/kernel_stat.h>
191da177e4SLinus Torvalds #include <linux/swap.h>
201da177e4SLinus Torvalds #include <linux/mman.h>
211da177e4SLinus Torvalds #include <linux/pagemap.h>
221da177e4SLinus Torvalds #include <linux/pagevec.h>
231da177e4SLinus Torvalds #include <linux/init.h>
24b95f1b31SPaul Gortmaker #include <linux/export.h>
251da177e4SLinus Torvalds #include <linux/mm_inline.h>
261da177e4SLinus Torvalds #include <linux/percpu_counter.h>
271da177e4SLinus Torvalds #include <linux/percpu.h>
281da177e4SLinus Torvalds #include <linux/cpu.h>
291da177e4SLinus Torvalds #include <linux/notifier.h>
30e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h>
3166e1707bSBalbir Singh #include <linux/memcontrol.h>
325a0e3ad6STejun Heo #include <linux/gfp.h>
33a27bb332SKent Overstreet #include <linux/uio.h>
341da177e4SLinus Torvalds 
3564d6519dSLee Schermerhorn #include "internal.h"
3664d6519dSLee Schermerhorn 
37c6286c98SMel Gorman #define CREATE_TRACE_POINTS
38c6286c98SMel Gorman #include <trace/events/pagemap.h>
39c6286c98SMel Gorman 
401da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */
411da177e4SLinus Torvalds int page_cluster;
421da177e4SLinus Torvalds 
4313f7f789SMel Gorman static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
44f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
4531560180SMinchan Kim static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
46902aaed0SHisashi Hifumi 
47b221385bSAdrian Bunk /*
48b221385bSAdrian Bunk  * This path almost never happens for VM activity - pages are normally
49b221385bSAdrian Bunk  * freed via pagevecs.  But it gets used by networking.
50b221385bSAdrian Bunk  */
51920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page)
52b221385bSAdrian Bunk {
53b221385bSAdrian Bunk 	if (PageLRU(page)) {
54b221385bSAdrian Bunk 		struct zone *zone = page_zone(page);
55fa9add64SHugh Dickins 		struct lruvec *lruvec;
56fa9add64SHugh Dickins 		unsigned long flags;
57b221385bSAdrian Bunk 
58b221385bSAdrian Bunk 		spin_lock_irqsave(&zone->lru_lock, flags);
59fa9add64SHugh Dickins 		lruvec = mem_cgroup_page_lruvec(page, zone);
60b221385bSAdrian Bunk 		VM_BUG_ON(!PageLRU(page));
61b221385bSAdrian Bunk 		__ClearPageLRU(page);
62fa9add64SHugh Dickins 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
63b221385bSAdrian Bunk 		spin_unlock_irqrestore(&zone->lru_lock, flags);
64b221385bSAdrian Bunk 	}
6591807063SAndrea Arcangeli }
6691807063SAndrea Arcangeli 
6791807063SAndrea Arcangeli static void __put_single_page(struct page *page)
6891807063SAndrea Arcangeli {
6991807063SAndrea Arcangeli 	__page_cache_release(page);
70fc91668eSLi Hong 	free_hot_cold_page(page, 0);
71b221385bSAdrian Bunk }
72b221385bSAdrian Bunk 
7391807063SAndrea Arcangeli static void __put_compound_page(struct page *page)
7491807063SAndrea Arcangeli {
7591807063SAndrea Arcangeli 	compound_page_dtor *dtor;
7691807063SAndrea Arcangeli 
7791807063SAndrea Arcangeli 	__page_cache_release(page);
7891807063SAndrea Arcangeli 	dtor = get_compound_page_dtor(page);
7991807063SAndrea Arcangeli 	(*dtor)(page);
8091807063SAndrea Arcangeli }
8191807063SAndrea Arcangeli 
828519fb30SNick Piggin static void put_compound_page(struct page *page)
831da177e4SLinus Torvalds {
8491807063SAndrea Arcangeli 	if (unlikely(PageTail(page))) {
8591807063SAndrea Arcangeli 		/* __split_huge_page_refcount can run under us */
8670b50f94SAndrea Arcangeli 		struct page *page_head = compound_trans_head(page);
8770b50f94SAndrea Arcangeli 
8870b50f94SAndrea Arcangeli 		if (likely(page != page_head &&
8970b50f94SAndrea Arcangeli 			   get_page_unless_zero(page_head))) {
9091807063SAndrea Arcangeli 			unsigned long flags;
915bf5f03cSPravin B Shelar 
925bf5f03cSPravin B Shelar 			/*
935bf5f03cSPravin B Shelar 			 * THP can not break up slab pages so avoid taking
945bf5f03cSPravin B Shelar 			 * compound_lock().  Slab performs non-atomic bit ops
955bf5f03cSPravin B Shelar 			 * on page->flags for better performance.  In particular
965bf5f03cSPravin B Shelar 			 * slab_unlock() in slub used to be a hot path.  It is
975bf5f03cSPravin B Shelar 			 * still hot on arches that do not support
985bf5f03cSPravin B Shelar 			 * this_cpu_cmpxchg_double().
995bf5f03cSPravin B Shelar 			 */
1005bf5f03cSPravin B Shelar 			if (PageSlab(page_head)) {
1015bf5f03cSPravin B Shelar 				if (PageTail(page)) {
1025bf5f03cSPravin B Shelar 					if (put_page_testzero(page_head))
1035bf5f03cSPravin B Shelar 						VM_BUG_ON(1);
1045bf5f03cSPravin B Shelar 
1055bf5f03cSPravin B Shelar 					atomic_dec(&page->_mapcount);
1065bf5f03cSPravin B Shelar 					goto skip_lock_tail;
1075bf5f03cSPravin B Shelar 				} else
1085bf5f03cSPravin B Shelar 					goto skip_lock;
1095bf5f03cSPravin B Shelar 			}
11091807063SAndrea Arcangeli 			/*
11170b50f94SAndrea Arcangeli 			 * page_head wasn't a dangling pointer but it
11270b50f94SAndrea Arcangeli 			 * may not be a head page anymore by the time
11370b50f94SAndrea Arcangeli 			 * we obtain the lock. That is ok as long as it
11470b50f94SAndrea Arcangeli 			 * can't be freed from under us.
11591807063SAndrea Arcangeli 			 */
11691807063SAndrea Arcangeli 			flags = compound_lock_irqsave(page_head);
11791807063SAndrea Arcangeli 			if (unlikely(!PageTail(page))) {
11891807063SAndrea Arcangeli 				/* __split_huge_page_refcount run before us */
11991807063SAndrea Arcangeli 				compound_unlock_irqrestore(page_head, flags);
1205bf5f03cSPravin B Shelar skip_lock:
12191807063SAndrea Arcangeli 				if (put_page_testzero(page_head))
12291807063SAndrea Arcangeli 					__put_single_page(page_head);
12391807063SAndrea Arcangeli out_put_single:
12491807063SAndrea Arcangeli 				if (put_page_testzero(page))
12591807063SAndrea Arcangeli 					__put_single_page(page);
12691807063SAndrea Arcangeli 				return;
12791807063SAndrea Arcangeli 			}
12891807063SAndrea Arcangeli 			VM_BUG_ON(page_head != page->first_page);
12991807063SAndrea Arcangeli 			/*
13091807063SAndrea Arcangeli 			 * We can release the refcount taken by
13170b50f94SAndrea Arcangeli 			 * get_page_unless_zero() now that
13270b50f94SAndrea Arcangeli 			 * __split_huge_page_refcount() is blocked on
13370b50f94SAndrea Arcangeli 			 * the compound_lock.
13491807063SAndrea Arcangeli 			 */
13591807063SAndrea Arcangeli 			if (put_page_testzero(page_head))
13691807063SAndrea Arcangeli 				VM_BUG_ON(1);
13791807063SAndrea Arcangeli 			/* __split_huge_page_refcount will wait now */
13870b50f94SAndrea Arcangeli 			VM_BUG_ON(page_mapcount(page) <= 0);
13970b50f94SAndrea Arcangeli 			atomic_dec(&page->_mapcount);
14091807063SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
14170b50f94SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page->_count) != 0);
14291807063SAndrea Arcangeli 			compound_unlock_irqrestore(page_head, flags);
1435bf5f03cSPravin B Shelar 
1445bf5f03cSPravin B Shelar skip_lock_tail:
145a95a82e9SAndrea Arcangeli 			if (put_page_testzero(page_head)) {
146a95a82e9SAndrea Arcangeli 				if (PageHead(page_head))
14791807063SAndrea Arcangeli 					__put_compound_page(page_head);
148a95a82e9SAndrea Arcangeli 				else
149a95a82e9SAndrea Arcangeli 					__put_single_page(page_head);
150a95a82e9SAndrea Arcangeli 			}
15191807063SAndrea Arcangeli 		} else {
15291807063SAndrea Arcangeli 			/* page_head is a dangling pointer */
15391807063SAndrea Arcangeli 			VM_BUG_ON(PageTail(page));
15491807063SAndrea Arcangeli 			goto out_put_single;
15591807063SAndrea Arcangeli 		}
15691807063SAndrea Arcangeli 	} else if (put_page_testzero(page)) {
15791807063SAndrea Arcangeli 		if (PageHead(page))
15891807063SAndrea Arcangeli 			__put_compound_page(page);
15991807063SAndrea Arcangeli 		else
16091807063SAndrea Arcangeli 			__put_single_page(page);
1611da177e4SLinus Torvalds 	}
1621da177e4SLinus Torvalds }
1638519fb30SNick Piggin 
1648519fb30SNick Piggin void put_page(struct page *page)
1658519fb30SNick Piggin {
1668519fb30SNick Piggin 	if (unlikely(PageCompound(page)))
1678519fb30SNick Piggin 		put_compound_page(page);
1688519fb30SNick Piggin 	else if (put_page_testzero(page))
16991807063SAndrea Arcangeli 		__put_single_page(page);
1701da177e4SLinus Torvalds }
1711da177e4SLinus Torvalds EXPORT_SYMBOL(put_page);
1721da177e4SLinus Torvalds 
17370b50f94SAndrea Arcangeli /*
17470b50f94SAndrea Arcangeli  * This function is exported but must not be called by anything other
17570b50f94SAndrea Arcangeli  * than get_page(). It implements the slow path of get_page().
17670b50f94SAndrea Arcangeli  */
17770b50f94SAndrea Arcangeli bool __get_page_tail(struct page *page)
17870b50f94SAndrea Arcangeli {
17970b50f94SAndrea Arcangeli 	/*
18070b50f94SAndrea Arcangeli 	 * This takes care of get_page() if run on a tail page
18170b50f94SAndrea Arcangeli 	 * returned by one of the get_user_pages/follow_page variants.
18270b50f94SAndrea Arcangeli 	 * get_user_pages/follow_page itself doesn't need the compound
18370b50f94SAndrea Arcangeli 	 * lock because it runs __get_page_tail_foll() under the
18470b50f94SAndrea Arcangeli 	 * proper PT lock that already serializes against
18570b50f94SAndrea Arcangeli 	 * split_huge_page().
18670b50f94SAndrea Arcangeli 	 */
18770b50f94SAndrea Arcangeli 	unsigned long flags;
18870b50f94SAndrea Arcangeli 	bool got = false;
18970b50f94SAndrea Arcangeli 	struct page *page_head = compound_trans_head(page);
19070b50f94SAndrea Arcangeli 
19170b50f94SAndrea Arcangeli 	if (likely(page != page_head && get_page_unless_zero(page_head))) {
1925bf5f03cSPravin B Shelar 
1935bf5f03cSPravin B Shelar 		/* Ref to put_compound_page() comment. */
1945bf5f03cSPravin B Shelar 		if (PageSlab(page_head)) {
1955bf5f03cSPravin B Shelar 			if (likely(PageTail(page))) {
1965bf5f03cSPravin B Shelar 				__get_page_tail_foll(page, false);
1975bf5f03cSPravin B Shelar 				return true;
1985bf5f03cSPravin B Shelar 			} else {
1995bf5f03cSPravin B Shelar 				put_page(page_head);
2005bf5f03cSPravin B Shelar 				return false;
2015bf5f03cSPravin B Shelar 			}
2025bf5f03cSPravin B Shelar 		}
2035bf5f03cSPravin B Shelar 
20470b50f94SAndrea Arcangeli 		/*
20570b50f94SAndrea Arcangeli 		 * page_head wasn't a dangling pointer but it
20670b50f94SAndrea Arcangeli 		 * may not be a head page anymore by the time
20770b50f94SAndrea Arcangeli 		 * we obtain the lock. That is ok as long as it
20870b50f94SAndrea Arcangeli 		 * can't be freed from under us.
20970b50f94SAndrea Arcangeli 		 */
21070b50f94SAndrea Arcangeli 		flags = compound_lock_irqsave(page_head);
21170b50f94SAndrea Arcangeli 		/* here __split_huge_page_refcount won't run anymore */
21270b50f94SAndrea Arcangeli 		if (likely(PageTail(page))) {
21370b50f94SAndrea Arcangeli 			__get_page_tail_foll(page, false);
21470b50f94SAndrea Arcangeli 			got = true;
21570b50f94SAndrea Arcangeli 		}
21670b50f94SAndrea Arcangeli 		compound_unlock_irqrestore(page_head, flags);
21770b50f94SAndrea Arcangeli 		if (unlikely(!got))
21870b50f94SAndrea Arcangeli 			put_page(page_head);
21970b50f94SAndrea Arcangeli 	}
22070b50f94SAndrea Arcangeli 	return got;
22170b50f94SAndrea Arcangeli }
22270b50f94SAndrea Arcangeli EXPORT_SYMBOL(__get_page_tail);
22370b50f94SAndrea Arcangeli 
2241d7ea732SAlexander Zarochentsev /**
2257682486bSRandy Dunlap  * put_pages_list() - release a list of pages
2267682486bSRandy Dunlap  * @pages: list of pages threaded on page->lru
2271d7ea732SAlexander Zarochentsev  *
2281d7ea732SAlexander Zarochentsev  * Release a list of pages which are strung together on page.lru.  Currently
2291d7ea732SAlexander Zarochentsev  * used by read_cache_pages() and related error recovery code.
2301d7ea732SAlexander Zarochentsev  */
2311d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages)
2321d7ea732SAlexander Zarochentsev {
2331d7ea732SAlexander Zarochentsev 	while (!list_empty(pages)) {
2341d7ea732SAlexander Zarochentsev 		struct page *victim;
2351d7ea732SAlexander Zarochentsev 
2361d7ea732SAlexander Zarochentsev 		victim = list_entry(pages->prev, struct page, lru);
2371d7ea732SAlexander Zarochentsev 		list_del(&victim->lru);
2381d7ea732SAlexander Zarochentsev 		page_cache_release(victim);
2391d7ea732SAlexander Zarochentsev 	}
2401d7ea732SAlexander Zarochentsev }
2411d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list);
2421d7ea732SAlexander Zarochentsev 
24318022c5dSMel Gorman /*
24418022c5dSMel Gorman  * get_kernel_pages() - pin kernel pages in memory
24518022c5dSMel Gorman  * @kiov:	An array of struct kvec structures
24618022c5dSMel Gorman  * @nr_segs:	number of segments to pin
24718022c5dSMel Gorman  * @write:	pinning for read/write, currently ignored
24818022c5dSMel Gorman  * @pages:	array that receives pointers to the pages pinned.
24918022c5dSMel Gorman  *		Should be at least nr_segs long.
25018022c5dSMel Gorman  *
25118022c5dSMel Gorman  * Returns number of pages pinned. This may be fewer than the number
25218022c5dSMel Gorman  * requested. If nr_pages is 0 or negative, returns 0. If no pages
25318022c5dSMel Gorman  * were pinned, returns -errno. Each page returned must be released
25418022c5dSMel Gorman  * with a put_page() call when it is finished with.
25518022c5dSMel Gorman  */
25618022c5dSMel Gorman int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
25718022c5dSMel Gorman 		struct page **pages)
25818022c5dSMel Gorman {
25918022c5dSMel Gorman 	int seg;
26018022c5dSMel Gorman 
26118022c5dSMel Gorman 	for (seg = 0; seg < nr_segs; seg++) {
26218022c5dSMel Gorman 		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
26318022c5dSMel Gorman 			return seg;
26418022c5dSMel Gorman 
2655a178119SMel Gorman 		pages[seg] = kmap_to_page(kiov[seg].iov_base);
26618022c5dSMel Gorman 		page_cache_get(pages[seg]);
26718022c5dSMel Gorman 	}
26818022c5dSMel Gorman 
26918022c5dSMel Gorman 	return seg;
27018022c5dSMel Gorman }
27118022c5dSMel Gorman EXPORT_SYMBOL_GPL(get_kernel_pages);
27218022c5dSMel Gorman 
27318022c5dSMel Gorman /*
27418022c5dSMel Gorman  * get_kernel_page() - pin a kernel page in memory
27518022c5dSMel Gorman  * @start:	starting kernel address
27618022c5dSMel Gorman  * @write:	pinning for read/write, currently ignored
27718022c5dSMel Gorman  * @pages:	array that receives pointer to the page pinned.
27818022c5dSMel Gorman  *		Must be at least nr_segs long.
27918022c5dSMel Gorman  *
28018022c5dSMel Gorman  * Returns 1 if page is pinned. If the page was not pinned, returns
28118022c5dSMel Gorman  * -errno. The page returned must be released with a put_page() call
28218022c5dSMel Gorman  * when it is finished with.
28318022c5dSMel Gorman  */
28418022c5dSMel Gorman int get_kernel_page(unsigned long start, int write, struct page **pages)
28518022c5dSMel Gorman {
28618022c5dSMel Gorman 	const struct kvec kiov = {
28718022c5dSMel Gorman 		.iov_base = (void *)start,
28818022c5dSMel Gorman 		.iov_len = PAGE_SIZE
28918022c5dSMel Gorman 	};
29018022c5dSMel Gorman 
29118022c5dSMel Gorman 	return get_kernel_pages(&kiov, 1, write, pages);
29218022c5dSMel Gorman }
29318022c5dSMel Gorman EXPORT_SYMBOL_GPL(get_kernel_page);
29418022c5dSMel Gorman 
2953dd7ae8eSShaohua Li static void pagevec_lru_move_fn(struct pagevec *pvec,
296fa9add64SHugh Dickins 	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
2973dd7ae8eSShaohua Li 	void *arg)
298902aaed0SHisashi Hifumi {
299902aaed0SHisashi Hifumi 	int i;
300902aaed0SHisashi Hifumi 	struct zone *zone = NULL;
301fa9add64SHugh Dickins 	struct lruvec *lruvec;
3023dd7ae8eSShaohua Li 	unsigned long flags = 0;
303902aaed0SHisashi Hifumi 
304902aaed0SHisashi Hifumi 	for (i = 0; i < pagevec_count(pvec); i++) {
305902aaed0SHisashi Hifumi 		struct page *page = pvec->pages[i];
306902aaed0SHisashi Hifumi 		struct zone *pagezone = page_zone(page);
307902aaed0SHisashi Hifumi 
308902aaed0SHisashi Hifumi 		if (pagezone != zone) {
309902aaed0SHisashi Hifumi 			if (zone)
3103dd7ae8eSShaohua Li 				spin_unlock_irqrestore(&zone->lru_lock, flags);
311902aaed0SHisashi Hifumi 			zone = pagezone;
3123dd7ae8eSShaohua Li 			spin_lock_irqsave(&zone->lru_lock, flags);
313902aaed0SHisashi Hifumi 		}
3143dd7ae8eSShaohua Li 
315fa9add64SHugh Dickins 		lruvec = mem_cgroup_page_lruvec(page, zone);
316fa9add64SHugh Dickins 		(*move_fn)(page, lruvec, arg);
3173dd7ae8eSShaohua Li 	}
3183dd7ae8eSShaohua Li 	if (zone)
3193dd7ae8eSShaohua Li 		spin_unlock_irqrestore(&zone->lru_lock, flags);
3203dd7ae8eSShaohua Li 	release_pages(pvec->pages, pvec->nr, pvec->cold);
3213dd7ae8eSShaohua Li 	pagevec_reinit(pvec);
3223dd7ae8eSShaohua Li }
3233dd7ae8eSShaohua Li 
324fa9add64SHugh Dickins static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
325fa9add64SHugh Dickins 				 void *arg)
3263dd7ae8eSShaohua Li {
3273dd7ae8eSShaohua Li 	int *pgmoved = arg;
3283dd7ae8eSShaohua Li 
329894bc310SLee Schermerhorn 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
3303f58a829SMinchan Kim 		enum lru_list lru = page_lru_base_type(page);
331925b7673SJohannes Weiner 		list_move_tail(&page->lru, &lruvec->lists[lru]);
3323dd7ae8eSShaohua Li 		(*pgmoved)++;
333902aaed0SHisashi Hifumi 	}
334902aaed0SHisashi Hifumi }
3353dd7ae8eSShaohua Li 
3363dd7ae8eSShaohua Li /*
3373dd7ae8eSShaohua Li  * pagevec_move_tail() must be called with IRQ disabled.
3383dd7ae8eSShaohua Li  * Otherwise this may cause nasty races.
3393dd7ae8eSShaohua Li  */
3403dd7ae8eSShaohua Li static void pagevec_move_tail(struct pagevec *pvec)
3413dd7ae8eSShaohua Li {
3423dd7ae8eSShaohua Li 	int pgmoved = 0;
3433dd7ae8eSShaohua Li 
3443dd7ae8eSShaohua Li 	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
345902aaed0SHisashi Hifumi 	__count_vm_events(PGROTATED, pgmoved);
346902aaed0SHisashi Hifumi }
347902aaed0SHisashi Hifumi 
348902aaed0SHisashi Hifumi /*
3491da177e4SLinus Torvalds  * Writeback is about to end against a page which has been marked for immediate
3501da177e4SLinus Torvalds  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
351902aaed0SHisashi Hifumi  * inactive list.
3521da177e4SLinus Torvalds  */
353ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page)
3541da177e4SLinus Torvalds {
355ac6aadb2SMiklos Szeredi 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
356894bc310SLee Schermerhorn 	    !PageUnevictable(page) && PageLRU(page)) {
357902aaed0SHisashi Hifumi 		struct pagevec *pvec;
3581da177e4SLinus Torvalds 		unsigned long flags;
3591da177e4SLinus Torvalds 
360902aaed0SHisashi Hifumi 		page_cache_get(page);
361902aaed0SHisashi Hifumi 		local_irq_save(flags);
362902aaed0SHisashi Hifumi 		pvec = &__get_cpu_var(lru_rotate_pvecs);
363902aaed0SHisashi Hifumi 		if (!pagevec_add(pvec, page))
364902aaed0SHisashi Hifumi 			pagevec_move_tail(pvec);
365902aaed0SHisashi Hifumi 		local_irq_restore(flags);
366ac6aadb2SMiklos Szeredi 	}
3671da177e4SLinus Torvalds }
3681da177e4SLinus Torvalds 
369fa9add64SHugh Dickins static void update_page_reclaim_stat(struct lruvec *lruvec,
3703e2f41f1SKOSAKI Motohiro 				     int file, int rotated)
3713e2f41f1SKOSAKI Motohiro {
372fa9add64SHugh Dickins 	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
3733e2f41f1SKOSAKI Motohiro 
3743e2f41f1SKOSAKI Motohiro 	reclaim_stat->recent_scanned[file]++;
3753e2f41f1SKOSAKI Motohiro 	if (rotated)
3763e2f41f1SKOSAKI Motohiro 		reclaim_stat->recent_rotated[file]++;
3773e2f41f1SKOSAKI Motohiro }
3783e2f41f1SKOSAKI Motohiro 
379fa9add64SHugh Dickins static void __activate_page(struct page *page, struct lruvec *lruvec,
380fa9add64SHugh Dickins 			    void *arg)
381744ed144SShaohua Li {
3827a608572SLinus Torvalds 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
383744ed144SShaohua Li 		int file = page_is_file_cache(page);
384744ed144SShaohua Li 		int lru = page_lru_base_type(page);
385744ed144SShaohua Li 
386fa9add64SHugh Dickins 		del_page_from_lru_list(page, lruvec, lru);
387744ed144SShaohua Li 		SetPageActive(page);
388744ed144SShaohua Li 		lru += LRU_ACTIVE;
389fa9add64SHugh Dickins 		add_page_to_lru_list(page, lruvec, lru);
390c6286c98SMel Gorman 		trace_mm_lru_activate(page, page_to_pfn(page));
3917a608572SLinus Torvalds 
392fa9add64SHugh Dickins 		__count_vm_event(PGACTIVATE);
393fa9add64SHugh Dickins 		update_page_reclaim_stat(lruvec, file, 1);
394744ed144SShaohua Li 	}
395eb709b0dSShaohua Li }
396eb709b0dSShaohua Li 
397eb709b0dSShaohua Li #ifdef CONFIG_SMP
398eb709b0dSShaohua Li static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
399eb709b0dSShaohua Li 
400eb709b0dSShaohua Li static void activate_page_drain(int cpu)
401eb709b0dSShaohua Li {
402eb709b0dSShaohua Li 	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
403eb709b0dSShaohua Li 
404eb709b0dSShaohua Li 	if (pagevec_count(pvec))
405eb709b0dSShaohua Li 		pagevec_lru_move_fn(pvec, __activate_page, NULL);
406eb709b0dSShaohua Li }
407eb709b0dSShaohua Li 
408eb709b0dSShaohua Li void activate_page(struct page *page)
409eb709b0dSShaohua Li {
410eb709b0dSShaohua Li 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
411eb709b0dSShaohua Li 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
412eb709b0dSShaohua Li 
413eb709b0dSShaohua Li 		page_cache_get(page);
414eb709b0dSShaohua Li 		if (!pagevec_add(pvec, page))
415eb709b0dSShaohua Li 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
416eb709b0dSShaohua Li 		put_cpu_var(activate_page_pvecs);
417eb709b0dSShaohua Li 	}
418eb709b0dSShaohua Li }
419eb709b0dSShaohua Li 
420eb709b0dSShaohua Li #else
421eb709b0dSShaohua Li static inline void activate_page_drain(int cpu)
422eb709b0dSShaohua Li {
423eb709b0dSShaohua Li }
424eb709b0dSShaohua Li 
425eb709b0dSShaohua Li void activate_page(struct page *page)
426eb709b0dSShaohua Li {
427eb709b0dSShaohua Li 	struct zone *zone = page_zone(page);
428eb709b0dSShaohua Li 
429eb709b0dSShaohua Li 	spin_lock_irq(&zone->lru_lock);
430fa9add64SHugh Dickins 	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
4311da177e4SLinus Torvalds 	spin_unlock_irq(&zone->lru_lock);
4321da177e4SLinus Torvalds }
433eb709b0dSShaohua Li #endif
4341da177e4SLinus Torvalds 
435*059285a2SMel Gorman static void __lru_cache_activate_page(struct page *page)
436*059285a2SMel Gorman {
437*059285a2SMel Gorman 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
438*059285a2SMel Gorman 	int i;
439*059285a2SMel Gorman 
440*059285a2SMel Gorman 	/*
441*059285a2SMel Gorman 	 * Search backwards on the optimistic assumption that the page being
442*059285a2SMel Gorman 	 * activated has just been added to this pagevec. Note that only
443*059285a2SMel Gorman 	 * the local pagevec is examined as a !PageLRU page could be in the
444*059285a2SMel Gorman 	 * process of being released, reclaimed, migrated or on a remote
445*059285a2SMel Gorman 	 * pagevec that is currently being drained. Furthermore, marking
446*059285a2SMel Gorman 	 * a remote pagevec's page PageActive potentially hits a race where
447*059285a2SMel Gorman 	 * a page is marked PageActive just after it is added to the inactive
448*059285a2SMel Gorman 	 * list causing accounting errors and BUG_ON checks to trigger.
449*059285a2SMel Gorman 	 */
450*059285a2SMel Gorman 	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
451*059285a2SMel Gorman 		struct page *pagevec_page = pvec->pages[i];
452*059285a2SMel Gorman 
453*059285a2SMel Gorman 		if (pagevec_page == page) {
454*059285a2SMel Gorman 			SetPageActive(page);
455*059285a2SMel Gorman 			break;
456*059285a2SMel Gorman 		}
457*059285a2SMel Gorman 	}
458*059285a2SMel Gorman 
459*059285a2SMel Gorman 	put_cpu_var(lru_add_pvec);
460*059285a2SMel Gorman }
461*059285a2SMel Gorman 
4621da177e4SLinus Torvalds /*
4631da177e4SLinus Torvalds  * Mark a page as having seen activity.
4641da177e4SLinus Torvalds  *
4651da177e4SLinus Torvalds  * inactive,unreferenced	->	inactive,referenced
4661da177e4SLinus Torvalds  * inactive,referenced		->	active,unreferenced
4671da177e4SLinus Torvalds  * active,unreferenced		->	active,referenced
4681da177e4SLinus Torvalds  */
469920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page)
4701da177e4SLinus Torvalds {
471894bc310SLee Schermerhorn 	if (!PageActive(page) && !PageUnevictable(page) &&
472*059285a2SMel Gorman 			PageReferenced(page)) {
473*059285a2SMel Gorman 
474*059285a2SMel Gorman 		/*
475*059285a2SMel Gorman 		 * If the page is on the LRU, queue it for activation via
476*059285a2SMel Gorman 		 * activate_page_pvecs. Otherwise, assume the page is on a
477*059285a2SMel Gorman 		 * pagevec, mark it active and it'll be moved to the active
478*059285a2SMel Gorman 		 * LRU on the next drain.
479*059285a2SMel Gorman 		 */
480*059285a2SMel Gorman 		if (PageLRU(page))
4811da177e4SLinus Torvalds 			activate_page(page);
482*059285a2SMel Gorman 		else
483*059285a2SMel Gorman 			__lru_cache_activate_page(page);
4841da177e4SLinus Torvalds 		ClearPageReferenced(page);
4851da177e4SLinus Torvalds 	} else if (!PageReferenced(page)) {
4861da177e4SLinus Torvalds 		SetPageReferenced(page);
4871da177e4SLinus Torvalds 	}
4881da177e4SLinus Torvalds }
4891da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed);
4901da177e4SLinus Torvalds 
491d741c9cdSRobin Dong /*
49213f7f789SMel Gorman  * Queue the page for addition to the LRU via pagevec. The decision on whether
49313f7f789SMel Gorman  * to add the page to the [in]active [file|anon] list is deferred until the
49413f7f789SMel Gorman  * pagevec is drained. This gives a chance for the caller of __lru_cache_add()
49513f7f789SMel Gorman  * have the page added to the active list using mark_page_accessed().
496d741c9cdSRobin Dong  */
497f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru)
4981da177e4SLinus Torvalds {
49913f7f789SMel Gorman 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
50013f7f789SMel Gorman 
50113f7f789SMel Gorman 	if (is_active_lru(lru))
50213f7f789SMel Gorman 		SetPageActive(page);
50313f7f789SMel Gorman 	else
50413f7f789SMel Gorman 		ClearPageActive(page);
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds 	page_cache_get(page);
507d741c9cdSRobin Dong 	if (!pagevec_space(pvec))
5085095ae83SHugh Dickins 		__pagevec_lru_add(pvec, lru);
509d741c9cdSRobin Dong 	pagevec_add(pvec, page);
51013f7f789SMel Gorman 	put_cpu_var(lru_add_pvec);
5111da177e4SLinus Torvalds }
51247846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add);
5131da177e4SLinus Torvalds 
514f04e9ebbSKOSAKI Motohiro /**
515f04e9ebbSKOSAKI Motohiro  * lru_cache_add_lru - add a page to a page list
516f04e9ebbSKOSAKI Motohiro  * @page: the page to be added to the LRU.
517f04e9ebbSKOSAKI Motohiro  * @lru: the LRU list to which the page is added.
518f04e9ebbSKOSAKI Motohiro  */
519f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru)
5201da177e4SLinus Torvalds {
521f04e9ebbSKOSAKI Motohiro 	if (PageActive(page)) {
522894bc310SLee Schermerhorn 		VM_BUG_ON(PageUnevictable(page));
523894bc310SLee Schermerhorn 	} else if (PageUnevictable(page)) {
524894bc310SLee Schermerhorn 		VM_BUG_ON(PageActive(page));
525f04e9ebbSKOSAKI Motohiro 	}
5261da177e4SLinus Torvalds 
52713f7f789SMel Gorman 	VM_BUG_ON(PageLRU(page));
528f04e9ebbSKOSAKI Motohiro 	__lru_cache_add(page, lru);
5291da177e4SLinus Torvalds }
5301da177e4SLinus Torvalds 
531894bc310SLee Schermerhorn /**
532894bc310SLee Schermerhorn  * add_page_to_unevictable_list - add a page to the unevictable list
533894bc310SLee Schermerhorn  * @page:  the page to be added to the unevictable list
534894bc310SLee Schermerhorn  *
535894bc310SLee Schermerhorn  * Add page directly to its zone's unevictable list.  To avoid races with
536894bc310SLee Schermerhorn  * tasks that might be making the page evictable, through eg. munlock,
537894bc310SLee Schermerhorn  * munmap or exit, while it's not on the lru, we want to add the page
538894bc310SLee Schermerhorn  * while it's locked or otherwise "invisible" to other tasks.  This is
539894bc310SLee Schermerhorn  * difficult to do when using the pagevec cache, so bypass that.
540894bc310SLee Schermerhorn  */
541894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page)
542894bc310SLee Schermerhorn {
543894bc310SLee Schermerhorn 	struct zone *zone = page_zone(page);
544fa9add64SHugh Dickins 	struct lruvec *lruvec;
545894bc310SLee Schermerhorn 
546894bc310SLee Schermerhorn 	spin_lock_irq(&zone->lru_lock);
547fa9add64SHugh Dickins 	lruvec = mem_cgroup_page_lruvec(page, zone);
548894bc310SLee Schermerhorn 	SetPageUnevictable(page);
549894bc310SLee Schermerhorn 	SetPageLRU(page);
550fa9add64SHugh Dickins 	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
551894bc310SLee Schermerhorn 	spin_unlock_irq(&zone->lru_lock);
552894bc310SLee Schermerhorn }
553894bc310SLee Schermerhorn 
554902aaed0SHisashi Hifumi /*
55531560180SMinchan Kim  * If the page can not be invalidated, it is moved to the
55631560180SMinchan Kim  * inactive list to speed up its reclaim.  It is moved to the
55731560180SMinchan Kim  * head of the list, rather than the tail, to give the flusher
55831560180SMinchan Kim  * threads some time to write it out, as this is much more
55931560180SMinchan Kim  * effective than the single-page writeout from reclaim.
560278df9f4SMinchan Kim  *
561278df9f4SMinchan Kim  * If the page isn't page_mapped and dirty/writeback, the page
562278df9f4SMinchan Kim  * could reclaim asap using PG_reclaim.
563278df9f4SMinchan Kim  *
564278df9f4SMinchan Kim  * 1. active, mapped page -> none
565278df9f4SMinchan Kim  * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
566278df9f4SMinchan Kim  * 3. inactive, mapped page -> none
567278df9f4SMinchan Kim  * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
568278df9f4SMinchan Kim  * 5. inactive, clean -> inactive, tail
569278df9f4SMinchan Kim  * 6. Others -> none
570278df9f4SMinchan Kim  *
571278df9f4SMinchan Kim  * In 4, why it moves inactive's head, the VM expects the page would
572278df9f4SMinchan Kim  * be write it out by flusher threads as this is much more effective
573278df9f4SMinchan Kim  * than the single-page writeout from reclaim.
57431560180SMinchan Kim  */
575fa9add64SHugh Dickins static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
576fa9add64SHugh Dickins 			      void *arg)
57731560180SMinchan Kim {
57831560180SMinchan Kim 	int lru, file;
579278df9f4SMinchan Kim 	bool active;
58031560180SMinchan Kim 
581278df9f4SMinchan Kim 	if (!PageLRU(page))
58231560180SMinchan Kim 		return;
58331560180SMinchan Kim 
584bad49d9cSMinchan Kim 	if (PageUnevictable(page))
585bad49d9cSMinchan Kim 		return;
586bad49d9cSMinchan Kim 
58731560180SMinchan Kim 	/* Some processes are using the page */
58831560180SMinchan Kim 	if (page_mapped(page))
58931560180SMinchan Kim 		return;
59031560180SMinchan Kim 
591278df9f4SMinchan Kim 	active = PageActive(page);
59231560180SMinchan Kim 	file = page_is_file_cache(page);
59331560180SMinchan Kim 	lru = page_lru_base_type(page);
594fa9add64SHugh Dickins 
595fa9add64SHugh Dickins 	del_page_from_lru_list(page, lruvec, lru + active);
59631560180SMinchan Kim 	ClearPageActive(page);
59731560180SMinchan Kim 	ClearPageReferenced(page);
598fa9add64SHugh Dickins 	add_page_to_lru_list(page, lruvec, lru);
59931560180SMinchan Kim 
600278df9f4SMinchan Kim 	if (PageWriteback(page) || PageDirty(page)) {
601278df9f4SMinchan Kim 		/*
602278df9f4SMinchan Kim 		 * PG_reclaim could be raced with end_page_writeback
603278df9f4SMinchan Kim 		 * It can make readahead confusing.  But race window
604278df9f4SMinchan Kim 		 * is _really_ small and  it's non-critical problem.
605278df9f4SMinchan Kim 		 */
606278df9f4SMinchan Kim 		SetPageReclaim(page);
607278df9f4SMinchan Kim 	} else {
608278df9f4SMinchan Kim 		/*
609278df9f4SMinchan Kim 		 * The page's writeback ends up during pagevec
610278df9f4SMinchan Kim 		 * We moves tha page into tail of inactive.
611278df9f4SMinchan Kim 		 */
612925b7673SJohannes Weiner 		list_move_tail(&page->lru, &lruvec->lists[lru]);
613278df9f4SMinchan Kim 		__count_vm_event(PGROTATED);
614278df9f4SMinchan Kim 	}
615278df9f4SMinchan Kim 
616278df9f4SMinchan Kim 	if (active)
617278df9f4SMinchan Kim 		__count_vm_event(PGDEACTIVATE);
618fa9add64SHugh Dickins 	update_page_reclaim_stat(lruvec, file, 0);
61931560180SMinchan Kim }
62031560180SMinchan Kim 
62131560180SMinchan Kim /*
622902aaed0SHisashi Hifumi  * Drain pages out of the cpu's pagevecs.
623902aaed0SHisashi Hifumi  * Either "cpu" is the current CPU, and preemption has already been
624902aaed0SHisashi Hifumi  * disabled; or "cpu" is being hot-unplugged, and is already dead.
625902aaed0SHisashi Hifumi  */
626f0cb3c76SKonstantin Khlebnikov void lru_add_drain_cpu(int cpu)
6271da177e4SLinus Torvalds {
62813f7f789SMel Gorman 	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds 	if (pagevec_count(pvec))
63113f7f789SMel Gorman 		__pagevec_lru_add(pvec, NR_LRU_LISTS);
632902aaed0SHisashi Hifumi 
633902aaed0SHisashi Hifumi 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
634902aaed0SHisashi Hifumi 	if (pagevec_count(pvec)) {
635902aaed0SHisashi Hifumi 		unsigned long flags;
636902aaed0SHisashi Hifumi 
637902aaed0SHisashi Hifumi 		/* No harm done if a racing interrupt already did this */
638902aaed0SHisashi Hifumi 		local_irq_save(flags);
639902aaed0SHisashi Hifumi 		pagevec_move_tail(pvec);
640902aaed0SHisashi Hifumi 		local_irq_restore(flags);
641902aaed0SHisashi Hifumi 	}
64231560180SMinchan Kim 
64331560180SMinchan Kim 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
64431560180SMinchan Kim 	if (pagevec_count(pvec))
6453dd7ae8eSShaohua Li 		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
646eb709b0dSShaohua Li 
647eb709b0dSShaohua Li 	activate_page_drain(cpu);
64831560180SMinchan Kim }
64931560180SMinchan Kim 
65031560180SMinchan Kim /**
65131560180SMinchan Kim  * deactivate_page - forcefully deactivate a page
65231560180SMinchan Kim  * @page: page to deactivate
65331560180SMinchan Kim  *
65431560180SMinchan Kim  * This function hints the VM that @page is a good reclaim candidate,
65531560180SMinchan Kim  * for example if its invalidation fails due to the page being dirty
65631560180SMinchan Kim  * or under writeback.
65731560180SMinchan Kim  */
65831560180SMinchan Kim void deactivate_page(struct page *page)
65931560180SMinchan Kim {
660821ed6bbSMinchan Kim 	/*
661821ed6bbSMinchan Kim 	 * In a workload with many unevictable page such as mprotect, unevictable
662821ed6bbSMinchan Kim 	 * page deactivation for accelerating reclaim is pointless.
663821ed6bbSMinchan Kim 	 */
664821ed6bbSMinchan Kim 	if (PageUnevictable(page))
665821ed6bbSMinchan Kim 		return;
666821ed6bbSMinchan Kim 
66731560180SMinchan Kim 	if (likely(get_page_unless_zero(page))) {
66831560180SMinchan Kim 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
66931560180SMinchan Kim 
67031560180SMinchan Kim 		if (!pagevec_add(pvec, page))
6713dd7ae8eSShaohua Li 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
67231560180SMinchan Kim 		put_cpu_var(lru_deactivate_pvecs);
67331560180SMinchan Kim 	}
67480bfed90SAndrew Morton }
67580bfed90SAndrew Morton 
67680bfed90SAndrew Morton void lru_add_drain(void)
67780bfed90SAndrew Morton {
678f0cb3c76SKonstantin Khlebnikov 	lru_add_drain_cpu(get_cpu());
67980bfed90SAndrew Morton 	put_cpu();
6801da177e4SLinus Torvalds }
6811da177e4SLinus Torvalds 
682c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy)
683053837fcSNick Piggin {
684053837fcSNick Piggin 	lru_add_drain();
685053837fcSNick Piggin }
686053837fcSNick Piggin 
687053837fcSNick Piggin /*
688053837fcSNick Piggin  * Returns 0 for success
689053837fcSNick Piggin  */
690053837fcSNick Piggin int lru_add_drain_all(void)
691053837fcSNick Piggin {
692c4028958SDavid Howells 	return schedule_on_each_cpu(lru_add_drain_per_cpu);
693053837fcSNick Piggin }
694053837fcSNick Piggin 
6951da177e4SLinus Torvalds /*
6961da177e4SLinus Torvalds  * Batched page_cache_release().  Decrement the reference count on all the
6971da177e4SLinus Torvalds  * passed pages.  If it fell to zero then remove the page from the LRU and
6981da177e4SLinus Torvalds  * free it.
6991da177e4SLinus Torvalds  *
7001da177e4SLinus Torvalds  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
7011da177e4SLinus Torvalds  * for the remainder of the operation.
7021da177e4SLinus Torvalds  *
703ab33dc09SFernando Luis Vazquez Cao  * The locking in this function is against shrink_inactive_list(): we recheck
704ab33dc09SFernando Luis Vazquez Cao  * the page count inside the lock to see whether shrink_inactive_list()
705ab33dc09SFernando Luis Vazquez Cao  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
706ab33dc09SFernando Luis Vazquez Cao  * will free it.
7071da177e4SLinus Torvalds  */
7081da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold)
7091da177e4SLinus Torvalds {
7101da177e4SLinus Torvalds 	int i;
711cc59850eSKonstantin Khlebnikov 	LIST_HEAD(pages_to_free);
7121da177e4SLinus Torvalds 	struct zone *zone = NULL;
713fa9add64SHugh Dickins 	struct lruvec *lruvec;
714902aaed0SHisashi Hifumi 	unsigned long uninitialized_var(flags);
7151da177e4SLinus Torvalds 
7161da177e4SLinus Torvalds 	for (i = 0; i < nr; i++) {
7171da177e4SLinus Torvalds 		struct page *page = pages[i];
7181da177e4SLinus Torvalds 
7198519fb30SNick Piggin 		if (unlikely(PageCompound(page))) {
7208519fb30SNick Piggin 			if (zone) {
721902aaed0SHisashi Hifumi 				spin_unlock_irqrestore(&zone->lru_lock, flags);
7228519fb30SNick Piggin 				zone = NULL;
7238519fb30SNick Piggin 			}
7248519fb30SNick Piggin 			put_compound_page(page);
7258519fb30SNick Piggin 			continue;
7268519fb30SNick Piggin 		}
7278519fb30SNick Piggin 
728b5810039SNick Piggin 		if (!put_page_testzero(page))
7291da177e4SLinus Torvalds 			continue;
7301da177e4SLinus Torvalds 
73146453a6eSNick Piggin 		if (PageLRU(page)) {
73246453a6eSNick Piggin 			struct zone *pagezone = page_zone(page);
733894bc310SLee Schermerhorn 
7341da177e4SLinus Torvalds 			if (pagezone != zone) {
7351da177e4SLinus Torvalds 				if (zone)
736902aaed0SHisashi Hifumi 					spin_unlock_irqrestore(&zone->lru_lock,
737902aaed0SHisashi Hifumi 									flags);
7381da177e4SLinus Torvalds 				zone = pagezone;
739902aaed0SHisashi Hifumi 				spin_lock_irqsave(&zone->lru_lock, flags);
7401da177e4SLinus Torvalds 			}
741fa9add64SHugh Dickins 
742fa9add64SHugh Dickins 			lruvec = mem_cgroup_page_lruvec(page, zone);
743725d704eSNick Piggin 			VM_BUG_ON(!PageLRU(page));
74467453911SNick Piggin 			__ClearPageLRU(page);
745fa9add64SHugh Dickins 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
74646453a6eSNick Piggin 		}
74746453a6eSNick Piggin 
748cc59850eSKonstantin Khlebnikov 		list_add(&page->lru, &pages_to_free);
7491da177e4SLinus Torvalds 	}
7501da177e4SLinus Torvalds 	if (zone)
751902aaed0SHisashi Hifumi 		spin_unlock_irqrestore(&zone->lru_lock, flags);
7521da177e4SLinus Torvalds 
753cc59850eSKonstantin Khlebnikov 	free_hot_cold_page_list(&pages_to_free, cold);
7541da177e4SLinus Torvalds }
7550be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages);
7561da177e4SLinus Torvalds 
7571da177e4SLinus Torvalds /*
7581da177e4SLinus Torvalds  * The pages which we're about to release may be in the deferred lru-addition
7591da177e4SLinus Torvalds  * queues.  That would prevent them from really being freed right now.  That's
7601da177e4SLinus Torvalds  * OK from a correctness point of view but is inefficient - those pages may be
7611da177e4SLinus Torvalds  * cache-warm and we want to give them back to the page allocator ASAP.
7621da177e4SLinus Torvalds  *
7631da177e4SLinus Torvalds  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
7641da177e4SLinus Torvalds  * and __pagevec_lru_add_active() call release_pages() directly to avoid
7651da177e4SLinus Torvalds  * mutual recursion.
7661da177e4SLinus Torvalds  */
7671da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec)
7681da177e4SLinus Torvalds {
7691da177e4SLinus Torvalds 	lru_add_drain();
7701da177e4SLinus Torvalds 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
7711da177e4SLinus Torvalds 	pagevec_reinit(pvec);
7721da177e4SLinus Torvalds }
7737f285701SSteve French EXPORT_SYMBOL(__pagevec_release);
7747f285701SSteve French 
77512d27107SHugh Dickins #ifdef CONFIG_TRANSPARENT_HUGEPAGE
77671e3aac0SAndrea Arcangeli /* used by __split_huge_page_refcount() */
777fa9add64SHugh Dickins void lru_add_page_tail(struct page *page, struct page *page_tail,
7785bc7b8acSShaohua Li 		       struct lruvec *lruvec, struct list_head *list)
77971e3aac0SAndrea Arcangeli {
7807512102cSHugh Dickins 	int uninitialized_var(active);
78171e3aac0SAndrea Arcangeli 	enum lru_list lru;
78271e3aac0SAndrea Arcangeli 	const int file = 0;
78371e3aac0SAndrea Arcangeli 
78471e3aac0SAndrea Arcangeli 	VM_BUG_ON(!PageHead(page));
78571e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageCompound(page_tail));
78671e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageLRU(page_tail));
787fa9add64SHugh Dickins 	VM_BUG_ON(NR_CPUS != 1 &&
788fa9add64SHugh Dickins 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
78971e3aac0SAndrea Arcangeli 
7905bc7b8acSShaohua Li 	if (!list)
79171e3aac0SAndrea Arcangeli 		SetPageLRU(page_tail);
79271e3aac0SAndrea Arcangeli 
79339b5f29aSHugh Dickins 	if (page_evictable(page_tail)) {
79471e3aac0SAndrea Arcangeli 		if (PageActive(page)) {
79571e3aac0SAndrea Arcangeli 			SetPageActive(page_tail);
79671e3aac0SAndrea Arcangeli 			active = 1;
79771e3aac0SAndrea Arcangeli 			lru = LRU_ACTIVE_ANON;
79871e3aac0SAndrea Arcangeli 		} else {
79971e3aac0SAndrea Arcangeli 			active = 0;
80071e3aac0SAndrea Arcangeli 			lru = LRU_INACTIVE_ANON;
80171e3aac0SAndrea Arcangeli 		}
80271e3aac0SAndrea Arcangeli 	} else {
80371e3aac0SAndrea Arcangeli 		SetPageUnevictable(page_tail);
80412d27107SHugh Dickins 		lru = LRU_UNEVICTABLE;
80512d27107SHugh Dickins 	}
80612d27107SHugh Dickins 
80712d27107SHugh Dickins 	if (likely(PageLRU(page)))
80812d27107SHugh Dickins 		list_add_tail(&page_tail->lru, &page->lru);
8095bc7b8acSShaohua Li 	else if (list) {
8105bc7b8acSShaohua Li 		/* page reclaim is reclaiming a huge page */
8115bc7b8acSShaohua Li 		get_page(page_tail);
8125bc7b8acSShaohua Li 		list_add_tail(&page_tail->lru, list);
8135bc7b8acSShaohua Li 	} else {
81412d27107SHugh Dickins 		struct list_head *list_head;
81512d27107SHugh Dickins 		/*
81612d27107SHugh Dickins 		 * Head page has not yet been counted, as an hpage,
81712d27107SHugh Dickins 		 * so we must account for each subpage individually.
81812d27107SHugh Dickins 		 *
81912d27107SHugh Dickins 		 * Use the standard add function to put page_tail on the list,
82012d27107SHugh Dickins 		 * but then correct its position so they all end up in order.
82112d27107SHugh Dickins 		 */
822fa9add64SHugh Dickins 		add_page_to_lru_list(page_tail, lruvec, lru);
82312d27107SHugh Dickins 		list_head = page_tail->lru.prev;
82412d27107SHugh Dickins 		list_move_tail(&page_tail->lru, list_head);
82571e3aac0SAndrea Arcangeli 	}
8267512102cSHugh Dickins 
8277512102cSHugh Dickins 	if (!PageUnevictable(page))
828fa9add64SHugh Dickins 		update_page_reclaim_stat(lruvec, file, active);
82971e3aac0SAndrea Arcangeli }
83012d27107SHugh Dickins #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
83171e3aac0SAndrea Arcangeli 
832fa9add64SHugh Dickins static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
833fa9add64SHugh Dickins 				 void *arg)
8343dd7ae8eSShaohua Li {
83513f7f789SMel Gorman 	enum lru_list requested_lru = (enum lru_list)arg;
83613f7f789SMel Gorman 	int file = page_is_file_cache(page);
83713f7f789SMel Gorman 	int active = PageActive(page);
83813f7f789SMel Gorman 	enum lru_list lru = page_lru(page);
8393dd7ae8eSShaohua Li 
84013f7f789SMel Gorman 	WARN_ON_ONCE(requested_lru < NR_LRU_LISTS && requested_lru != lru);
8413dd7ae8eSShaohua Li 	VM_BUG_ON(PageUnevictable(page));
8423dd7ae8eSShaohua Li 	VM_BUG_ON(PageLRU(page));
8433dd7ae8eSShaohua Li 
8443dd7ae8eSShaohua Li 	SetPageLRU(page);
845fa9add64SHugh Dickins 	add_page_to_lru_list(page, lruvec, lru);
846fa9add64SHugh Dickins 	update_page_reclaim_stat(lruvec, file, active);
847c6286c98SMel Gorman 	trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
8483dd7ae8eSShaohua Li }
8493dd7ae8eSShaohua Li 
8501da177e4SLinus Torvalds /*
8511da177e4SLinus Torvalds  * Add the passed pages to the LRU, then drop the caller's refcount
8521da177e4SLinus Torvalds  * on them.  Reinitialises the caller's pagevec.
8531da177e4SLinus Torvalds  */
8545095ae83SHugh Dickins void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
8551da177e4SLinus Torvalds {
856894bc310SLee Schermerhorn 	VM_BUG_ON(is_unevictable_lru(lru));
8571da177e4SLinus Torvalds 
8585095ae83SHugh Dickins 	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
8591da177e4SLinus Torvalds }
8605095ae83SHugh Dickins EXPORT_SYMBOL(__pagevec_lru_add);
861f04e9ebbSKOSAKI Motohiro 
8621da177e4SLinus Torvalds /**
8631da177e4SLinus Torvalds  * pagevec_lookup - gang pagecache lookup
8641da177e4SLinus Torvalds  * @pvec:	Where the resulting pages are placed
8651da177e4SLinus Torvalds  * @mapping:	The address_space to search
8661da177e4SLinus Torvalds  * @start:	The starting page index
8671da177e4SLinus Torvalds  * @nr_pages:	The maximum number of pages
8681da177e4SLinus Torvalds  *
8691da177e4SLinus Torvalds  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
8701da177e4SLinus Torvalds  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
8711da177e4SLinus Torvalds  * reference against the pages in @pvec.
8721da177e4SLinus Torvalds  *
8731da177e4SLinus Torvalds  * The search returns a group of mapping-contiguous pages with ascending
8741da177e4SLinus Torvalds  * indexes.  There may be holes in the indices due to not-present pages.
8751da177e4SLinus Torvalds  *
8761da177e4SLinus Torvalds  * pagevec_lookup() returns the number of pages which were found.
8771da177e4SLinus Torvalds  */
8781da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
8791da177e4SLinus Torvalds 		pgoff_t start, unsigned nr_pages)
8801da177e4SLinus Torvalds {
8811da177e4SLinus Torvalds 	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
8821da177e4SLinus Torvalds 	return pagevec_count(pvec);
8831da177e4SLinus Torvalds }
88478539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup);
88578539fdfSChristoph Hellwig 
8861da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
8871da177e4SLinus Torvalds 		pgoff_t *index, int tag, unsigned nr_pages)
8881da177e4SLinus Torvalds {
8891da177e4SLinus Torvalds 	pvec->nr = find_get_pages_tag(mapping, index, tag,
8901da177e4SLinus Torvalds 					nr_pages, pvec->pages);
8911da177e4SLinus Torvalds 	return pagevec_count(pvec);
8921da177e4SLinus Torvalds }
8937f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag);
8941da177e4SLinus Torvalds 
8951da177e4SLinus Torvalds /*
8961da177e4SLinus Torvalds  * Perform any setup for the swap system
8971da177e4SLinus Torvalds  */
8981da177e4SLinus Torvalds void __init swap_setup(void)
8991da177e4SLinus Torvalds {
9004481374cSJan Beulich 	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
901e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP
90233806f06SShaohua Li 	int i;
90333806f06SShaohua Li 
90433806f06SShaohua Li 	bdi_init(swapper_spaces[0].backing_dev_info);
90533806f06SShaohua Li 	for (i = 0; i < MAX_SWAPFILES; i++) {
90633806f06SShaohua Li 		spin_lock_init(&swapper_spaces[i].tree_lock);
90733806f06SShaohua Li 		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
90833806f06SShaohua Li 	}
909e0bf68ddSPeter Zijlstra #endif
910e0bf68ddSPeter Zijlstra 
9111da177e4SLinus Torvalds 	/* Use a smaller cluster for small-memory machines */
9121da177e4SLinus Torvalds 	if (megs < 16)
9131da177e4SLinus Torvalds 		page_cluster = 2;
9141da177e4SLinus Torvalds 	else
9151da177e4SLinus Torvalds 		page_cluster = 3;
9161da177e4SLinus Torvalds 	/*
9171da177e4SLinus Torvalds 	 * Right now other parts of the system means that we
9181da177e4SLinus Torvalds 	 * _really_ don't want to cluster much more
9191da177e4SLinus Torvalds 	 */
9201da177e4SLinus Torvalds }
921