xref: /linux/mm/swap.c (revision 7a608572a282a74978e10fd6cd63090aebe29f5c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  linux/mm/swap.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
51da177e4SLinus Torvalds  */
61da177e4SLinus Torvalds 
71da177e4SLinus Torvalds /*
8183ff22bSSimon Arlott  * This file contains the default values for the operation of the
91da177e4SLinus Torvalds  * Linux VM subsystem. Fine-tuning documentation can be found in
101da177e4SLinus Torvalds  * Documentation/sysctl/vm.txt.
111da177e4SLinus Torvalds  * Started 18.12.91
121da177e4SLinus Torvalds  * Swap aging added 23.2.95, Stephen Tweedie.
131da177e4SLinus Torvalds  * Buffermem limits added 12.3.98, Rik van Riel.
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/sched.h>
181da177e4SLinus Torvalds #include <linux/kernel_stat.h>
191da177e4SLinus Torvalds #include <linux/swap.h>
201da177e4SLinus Torvalds #include <linux/mman.h>
211da177e4SLinus Torvalds #include <linux/pagemap.h>
221da177e4SLinus Torvalds #include <linux/pagevec.h>
231da177e4SLinus Torvalds #include <linux/init.h>
241da177e4SLinus Torvalds #include <linux/module.h>
251da177e4SLinus Torvalds #include <linux/mm_inline.h>
261da177e4SLinus Torvalds #include <linux/buffer_head.h>	/* for try_to_release_page() */
271da177e4SLinus Torvalds #include <linux/percpu_counter.h>
281da177e4SLinus Torvalds #include <linux/percpu.h>
291da177e4SLinus Torvalds #include <linux/cpu.h>
301da177e4SLinus Torvalds #include <linux/notifier.h>
31e0bf68ddSPeter Zijlstra #include <linux/backing-dev.h>
3266e1707bSBalbir Singh #include <linux/memcontrol.h>
335a0e3ad6STejun Heo #include <linux/gfp.h>
341da177e4SLinus Torvalds 
3564d6519dSLee Schermerhorn #include "internal.h"
3664d6519dSLee Schermerhorn 
371da177e4SLinus Torvalds /* How many pages do we try to swap or page in/out together? */
381da177e4SLinus Torvalds int page_cluster;
391da177e4SLinus Torvalds 
40f04e9ebbSKOSAKI Motohiro static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
41f84f9504SVegard Nossum static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
42902aaed0SHisashi Hifumi 
43b221385bSAdrian Bunk /*
44b221385bSAdrian Bunk  * This path almost never happens for VM activity - pages are normally
45b221385bSAdrian Bunk  * freed via pagevecs.  But it gets used by networking.
46b221385bSAdrian Bunk  */
47920c7a5dSHarvey Harrison static void __page_cache_release(struct page *page)
48b221385bSAdrian Bunk {
49b221385bSAdrian Bunk 	if (PageLRU(page)) {
50b221385bSAdrian Bunk 		unsigned long flags;
51b221385bSAdrian Bunk 		struct zone *zone = page_zone(page);
52b221385bSAdrian Bunk 
53b221385bSAdrian Bunk 		spin_lock_irqsave(&zone->lru_lock, flags);
54b221385bSAdrian Bunk 		VM_BUG_ON(!PageLRU(page));
55b221385bSAdrian Bunk 		__ClearPageLRU(page);
56b221385bSAdrian Bunk 		del_page_from_lru(zone, page);
57b221385bSAdrian Bunk 		spin_unlock_irqrestore(&zone->lru_lock, flags);
58b221385bSAdrian Bunk 	}
5991807063SAndrea Arcangeli }
6091807063SAndrea Arcangeli 
6191807063SAndrea Arcangeli static void __put_single_page(struct page *page)
6291807063SAndrea Arcangeli {
6391807063SAndrea Arcangeli 	__page_cache_release(page);
64fc91668eSLi Hong 	free_hot_cold_page(page, 0);
65b221385bSAdrian Bunk }
66b221385bSAdrian Bunk 
6791807063SAndrea Arcangeli static void __put_compound_page(struct page *page)
6891807063SAndrea Arcangeli {
6991807063SAndrea Arcangeli 	compound_page_dtor *dtor;
7091807063SAndrea Arcangeli 
7191807063SAndrea Arcangeli 	__page_cache_release(page);
7291807063SAndrea Arcangeli 	dtor = get_compound_page_dtor(page);
7391807063SAndrea Arcangeli 	(*dtor)(page);
7491807063SAndrea Arcangeli }
7591807063SAndrea Arcangeli 
768519fb30SNick Piggin static void put_compound_page(struct page *page)
771da177e4SLinus Torvalds {
7891807063SAndrea Arcangeli 	if (unlikely(PageTail(page))) {
7991807063SAndrea Arcangeli 		/* __split_huge_page_refcount can run under us */
8091807063SAndrea Arcangeli 		struct page *page_head = page->first_page;
8191807063SAndrea Arcangeli 		smp_rmb();
8291807063SAndrea Arcangeli 		/*
8391807063SAndrea Arcangeli 		 * If PageTail is still set after smp_rmb() we can be sure
8491807063SAndrea Arcangeli 		 * that the page->first_page we read wasn't a dangling pointer.
8591807063SAndrea Arcangeli 		 * See __split_huge_page_refcount() smp_wmb().
8691807063SAndrea Arcangeli 		 */
8791807063SAndrea Arcangeli 		if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
8891807063SAndrea Arcangeli 			unsigned long flags;
8991807063SAndrea Arcangeli 			/*
9091807063SAndrea Arcangeli 			 * Verify that our page_head wasn't converted
9191807063SAndrea Arcangeli 			 * to a a regular page before we got a
9291807063SAndrea Arcangeli 			 * reference on it.
9391807063SAndrea Arcangeli 			 */
9491807063SAndrea Arcangeli 			if (unlikely(!PageHead(page_head))) {
9591807063SAndrea Arcangeli 				/* PageHead is cleared after PageTail */
9691807063SAndrea Arcangeli 				smp_rmb();
9791807063SAndrea Arcangeli 				VM_BUG_ON(PageTail(page));
9891807063SAndrea Arcangeli 				goto out_put_head;
9991807063SAndrea Arcangeli 			}
10091807063SAndrea Arcangeli 			/*
10191807063SAndrea Arcangeli 			 * Only run compound_lock on a valid PageHead,
10291807063SAndrea Arcangeli 			 * after having it pinned with
10391807063SAndrea Arcangeli 			 * get_page_unless_zero() above.
10491807063SAndrea Arcangeli 			 */
10591807063SAndrea Arcangeli 			smp_mb();
10691807063SAndrea Arcangeli 			/* page_head wasn't a dangling pointer */
10791807063SAndrea Arcangeli 			flags = compound_lock_irqsave(page_head);
10891807063SAndrea Arcangeli 			if (unlikely(!PageTail(page))) {
10991807063SAndrea Arcangeli 				/* __split_huge_page_refcount run before us */
11091807063SAndrea Arcangeli 				compound_unlock_irqrestore(page_head, flags);
11191807063SAndrea Arcangeli 				VM_BUG_ON(PageHead(page_head));
11291807063SAndrea Arcangeli 			out_put_head:
11391807063SAndrea Arcangeli 				if (put_page_testzero(page_head))
11491807063SAndrea Arcangeli 					__put_single_page(page_head);
11591807063SAndrea Arcangeli 			out_put_single:
11691807063SAndrea Arcangeli 				if (put_page_testzero(page))
11791807063SAndrea Arcangeli 					__put_single_page(page);
11891807063SAndrea Arcangeli 				return;
11991807063SAndrea Arcangeli 			}
12091807063SAndrea Arcangeli 			VM_BUG_ON(page_head != page->first_page);
12191807063SAndrea Arcangeli 			/*
12291807063SAndrea Arcangeli 			 * We can release the refcount taken by
12391807063SAndrea Arcangeli 			 * get_page_unless_zero now that
12491807063SAndrea Arcangeli 			 * split_huge_page_refcount is blocked on the
12591807063SAndrea Arcangeli 			 * compound_lock.
12691807063SAndrea Arcangeli 			 */
12791807063SAndrea Arcangeli 			if (put_page_testzero(page_head))
12891807063SAndrea Arcangeli 				VM_BUG_ON(1);
12991807063SAndrea Arcangeli 			/* __split_huge_page_refcount will wait now */
13091807063SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page->_count) <= 0);
13191807063SAndrea Arcangeli 			atomic_dec(&page->_count);
13291807063SAndrea Arcangeli 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
13391807063SAndrea Arcangeli 			compound_unlock_irqrestore(page_head, flags);
134a95a82e9SAndrea Arcangeli 			if (put_page_testzero(page_head)) {
135a95a82e9SAndrea Arcangeli 				if (PageHead(page_head))
13691807063SAndrea Arcangeli 					__put_compound_page(page_head);
137a95a82e9SAndrea Arcangeli 				else
138a95a82e9SAndrea Arcangeli 					__put_single_page(page_head);
139a95a82e9SAndrea Arcangeli 			}
14091807063SAndrea Arcangeli 		} else {
14191807063SAndrea Arcangeli 			/* page_head is a dangling pointer */
14291807063SAndrea Arcangeli 			VM_BUG_ON(PageTail(page));
14391807063SAndrea Arcangeli 			goto out_put_single;
14491807063SAndrea Arcangeli 		}
14591807063SAndrea Arcangeli 	} else if (put_page_testzero(page)) {
14691807063SAndrea Arcangeli 		if (PageHead(page))
14791807063SAndrea Arcangeli 			__put_compound_page(page);
14891807063SAndrea Arcangeli 		else
14991807063SAndrea Arcangeli 			__put_single_page(page);
1501da177e4SLinus Torvalds 	}
1511da177e4SLinus Torvalds }
1528519fb30SNick Piggin 
1538519fb30SNick Piggin void put_page(struct page *page)
1548519fb30SNick Piggin {
1558519fb30SNick Piggin 	if (unlikely(PageCompound(page)))
1568519fb30SNick Piggin 		put_compound_page(page);
1578519fb30SNick Piggin 	else if (put_page_testzero(page))
15891807063SAndrea Arcangeli 		__put_single_page(page);
1591da177e4SLinus Torvalds }
1601da177e4SLinus Torvalds EXPORT_SYMBOL(put_page);
1611da177e4SLinus Torvalds 
1621d7ea732SAlexander Zarochentsev /**
1637682486bSRandy Dunlap  * put_pages_list() - release a list of pages
1647682486bSRandy Dunlap  * @pages: list of pages threaded on page->lru
1651d7ea732SAlexander Zarochentsev  *
1661d7ea732SAlexander Zarochentsev  * Release a list of pages which are strung together on page.lru.  Currently
1671d7ea732SAlexander Zarochentsev  * used by read_cache_pages() and related error recovery code.
1681d7ea732SAlexander Zarochentsev  */
1691d7ea732SAlexander Zarochentsev void put_pages_list(struct list_head *pages)
1701d7ea732SAlexander Zarochentsev {
1711d7ea732SAlexander Zarochentsev 	while (!list_empty(pages)) {
1721d7ea732SAlexander Zarochentsev 		struct page *victim;
1731d7ea732SAlexander Zarochentsev 
1741d7ea732SAlexander Zarochentsev 		victim = list_entry(pages->prev, struct page, lru);
1751d7ea732SAlexander Zarochentsev 		list_del(&victim->lru);
1761d7ea732SAlexander Zarochentsev 		page_cache_release(victim);
1771d7ea732SAlexander Zarochentsev 	}
1781d7ea732SAlexander Zarochentsev }
1791d7ea732SAlexander Zarochentsev EXPORT_SYMBOL(put_pages_list);
1801d7ea732SAlexander Zarochentsev 
181d8505deeSShaohua Li static void pagevec_lru_move_fn(struct pagevec *pvec,
182d8505deeSShaohua Li 				void (*move_fn)(struct page *page, void *arg),
183d8505deeSShaohua Li 				void *arg)
184902aaed0SHisashi Hifumi {
185902aaed0SHisashi Hifumi 	int i;
186902aaed0SHisashi Hifumi 	struct zone *zone = NULL;
187d8505deeSShaohua Li 	unsigned long flags = 0;
188902aaed0SHisashi Hifumi 
189902aaed0SHisashi Hifumi 	for (i = 0; i < pagevec_count(pvec); i++) {
190902aaed0SHisashi Hifumi 		struct page *page = pvec->pages[i];
191902aaed0SHisashi Hifumi 		struct zone *pagezone = page_zone(page);
192902aaed0SHisashi Hifumi 
193902aaed0SHisashi Hifumi 		if (pagezone != zone) {
194902aaed0SHisashi Hifumi 			if (zone)
195d8505deeSShaohua Li 				spin_unlock_irqrestore(&zone->lru_lock, flags);
196902aaed0SHisashi Hifumi 			zone = pagezone;
197d8505deeSShaohua Li 			spin_lock_irqsave(&zone->lru_lock, flags);
198902aaed0SHisashi Hifumi 		}
199d8505deeSShaohua Li 
200d8505deeSShaohua Li 		(*move_fn)(page, arg);
201d8505deeSShaohua Li 	}
202d8505deeSShaohua Li 	if (zone)
203d8505deeSShaohua Li 		spin_unlock_irqrestore(&zone->lru_lock, flags);
204d8505deeSShaohua Li 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
205d8505deeSShaohua Li 	pagevec_reinit(pvec);
206d8505deeSShaohua Li }
207d8505deeSShaohua Li 
208d8505deeSShaohua Li static void pagevec_move_tail_fn(struct page *page, void *arg)
209d8505deeSShaohua Li {
210d8505deeSShaohua Li 	int *pgmoved = arg;
211d8505deeSShaohua Li 	struct zone *zone = page_zone(page);
212d8505deeSShaohua Li 
213894bc310SLee Schermerhorn 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
214401a8e1cSJohannes Weiner 		int lru = page_lru_base_type(page);
2154f98a2feSRik van Riel 		list_move_tail(&page->lru, &zone->lru[lru].list);
216d8505deeSShaohua Li 		(*pgmoved)++;
217902aaed0SHisashi Hifumi 	}
218902aaed0SHisashi Hifumi }
219d8505deeSShaohua Li 
220d8505deeSShaohua Li /*
221d8505deeSShaohua Li  * pagevec_move_tail() must be called with IRQ disabled.
222d8505deeSShaohua Li  * Otherwise this may cause nasty races.
223d8505deeSShaohua Li  */
224d8505deeSShaohua Li static void pagevec_move_tail(struct pagevec *pvec)
225d8505deeSShaohua Li {
226d8505deeSShaohua Li 	int pgmoved = 0;
227d8505deeSShaohua Li 
228d8505deeSShaohua Li 	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
229902aaed0SHisashi Hifumi 	__count_vm_events(PGROTATED, pgmoved);
230902aaed0SHisashi Hifumi }
231902aaed0SHisashi Hifumi 
232902aaed0SHisashi Hifumi /*
2331da177e4SLinus Torvalds  * Writeback is about to end against a page which has been marked for immediate
2341da177e4SLinus Torvalds  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
235902aaed0SHisashi Hifumi  * inactive list.
2361da177e4SLinus Torvalds  */
237ac6aadb2SMiklos Szeredi void rotate_reclaimable_page(struct page *page)
2381da177e4SLinus Torvalds {
239ac6aadb2SMiklos Szeredi 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
240894bc310SLee Schermerhorn 	    !PageUnevictable(page) && PageLRU(page)) {
241902aaed0SHisashi Hifumi 		struct pagevec *pvec;
2421da177e4SLinus Torvalds 		unsigned long flags;
2431da177e4SLinus Torvalds 
244902aaed0SHisashi Hifumi 		page_cache_get(page);
245902aaed0SHisashi Hifumi 		local_irq_save(flags);
246902aaed0SHisashi Hifumi 		pvec = &__get_cpu_var(lru_rotate_pvecs);
247902aaed0SHisashi Hifumi 		if (!pagevec_add(pvec, page))
248902aaed0SHisashi Hifumi 			pagevec_move_tail(pvec);
249902aaed0SHisashi Hifumi 		local_irq_restore(flags);
250ac6aadb2SMiklos Szeredi 	}
2511da177e4SLinus Torvalds }
2521da177e4SLinus Torvalds 
2533e2f41f1SKOSAKI Motohiro static void update_page_reclaim_stat(struct zone *zone, struct page *page,
2543e2f41f1SKOSAKI Motohiro 				     int file, int rotated)
2553e2f41f1SKOSAKI Motohiro {
2563e2f41f1SKOSAKI Motohiro 	struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
2573e2f41f1SKOSAKI Motohiro 	struct zone_reclaim_stat *memcg_reclaim_stat;
2583e2f41f1SKOSAKI Motohiro 
2593e2f41f1SKOSAKI Motohiro 	memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
2603e2f41f1SKOSAKI Motohiro 
2613e2f41f1SKOSAKI Motohiro 	reclaim_stat->recent_scanned[file]++;
2623e2f41f1SKOSAKI Motohiro 	if (rotated)
2633e2f41f1SKOSAKI Motohiro 		reclaim_stat->recent_rotated[file]++;
2643e2f41f1SKOSAKI Motohiro 
2653e2f41f1SKOSAKI Motohiro 	if (!memcg_reclaim_stat)
2663e2f41f1SKOSAKI Motohiro 		return;
2673e2f41f1SKOSAKI Motohiro 
2683e2f41f1SKOSAKI Motohiro 	memcg_reclaim_stat->recent_scanned[file]++;
2693e2f41f1SKOSAKI Motohiro 	if (rotated)
2703e2f41f1SKOSAKI Motohiro 		memcg_reclaim_stat->recent_rotated[file]++;
2713e2f41f1SKOSAKI Motohiro }
2723e2f41f1SKOSAKI Motohiro 
2731da177e4SLinus Torvalds /*
274*7a608572SLinus Torvalds  * FIXME: speed this up?
2751da177e4SLinus Torvalds  */
276*7a608572SLinus Torvalds void activate_page(struct page *page)
277744ed144SShaohua Li {
278744ed144SShaohua Li 	struct zone *zone = page_zone(page);
279*7a608572SLinus Torvalds 
280*7a608572SLinus Torvalds 	spin_lock_irq(&zone->lru_lock);
281*7a608572SLinus Torvalds 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
282744ed144SShaohua Li 		int file = page_is_file_cache(page);
283744ed144SShaohua Li 		int lru = page_lru_base_type(page);
284744ed144SShaohua Li 		del_page_from_lru_list(zone, page, lru);
285744ed144SShaohua Li 
286744ed144SShaohua Li 		SetPageActive(page);
287744ed144SShaohua Li 		lru += LRU_ACTIVE;
288744ed144SShaohua Li 		add_page_to_lru_list(zone, page, lru);
289744ed144SShaohua Li 		__count_vm_event(PGACTIVATE);
290*7a608572SLinus Torvalds 
291744ed144SShaohua Li 		update_page_reclaim_stat(zone, page, file, 1);
292744ed144SShaohua Li 	}
2931da177e4SLinus Torvalds 	spin_unlock_irq(&zone->lru_lock);
2941da177e4SLinus Torvalds }
2951da177e4SLinus Torvalds 
2961da177e4SLinus Torvalds /*
2971da177e4SLinus Torvalds  * Mark a page as having seen activity.
2981da177e4SLinus Torvalds  *
2991da177e4SLinus Torvalds  * inactive,unreferenced	->	inactive,referenced
3001da177e4SLinus Torvalds  * inactive,referenced		->	active,unreferenced
3011da177e4SLinus Torvalds  * active,unreferenced		->	active,referenced
3021da177e4SLinus Torvalds  */
303920c7a5dSHarvey Harrison void mark_page_accessed(struct page *page)
3041da177e4SLinus Torvalds {
305894bc310SLee Schermerhorn 	if (!PageActive(page) && !PageUnevictable(page) &&
306894bc310SLee Schermerhorn 			PageReferenced(page) && PageLRU(page)) {
3071da177e4SLinus Torvalds 		activate_page(page);
3081da177e4SLinus Torvalds 		ClearPageReferenced(page);
3091da177e4SLinus Torvalds 	} else if (!PageReferenced(page)) {
3101da177e4SLinus Torvalds 		SetPageReferenced(page);
3111da177e4SLinus Torvalds 	}
3121da177e4SLinus Torvalds }
3131da177e4SLinus Torvalds 
3141da177e4SLinus Torvalds EXPORT_SYMBOL(mark_page_accessed);
3151da177e4SLinus Torvalds 
316f04e9ebbSKOSAKI Motohiro void __lru_cache_add(struct page *page, enum lru_list lru)
3171da177e4SLinus Torvalds {
318f04e9ebbSKOSAKI Motohiro 	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 	page_cache_get(page);
3211da177e4SLinus Torvalds 	if (!pagevec_add(pvec, page))
322f04e9ebbSKOSAKI Motohiro 		____pagevec_lru_add(pvec, lru);
3231da177e4SLinus Torvalds 	put_cpu_var(lru_add_pvecs);
3241da177e4SLinus Torvalds }
32547846b06SMiklos Szeredi EXPORT_SYMBOL(__lru_cache_add);
3261da177e4SLinus Torvalds 
327f04e9ebbSKOSAKI Motohiro /**
328f04e9ebbSKOSAKI Motohiro  * lru_cache_add_lru - add a page to a page list
329f04e9ebbSKOSAKI Motohiro  * @page: the page to be added to the LRU.
330f04e9ebbSKOSAKI Motohiro  * @lru: the LRU list to which the page is added.
331f04e9ebbSKOSAKI Motohiro  */
332f04e9ebbSKOSAKI Motohiro void lru_cache_add_lru(struct page *page, enum lru_list lru)
3331da177e4SLinus Torvalds {
334f04e9ebbSKOSAKI Motohiro 	if (PageActive(page)) {
335894bc310SLee Schermerhorn 		VM_BUG_ON(PageUnevictable(page));
336f04e9ebbSKOSAKI Motohiro 		ClearPageActive(page);
337894bc310SLee Schermerhorn 	} else if (PageUnevictable(page)) {
338894bc310SLee Schermerhorn 		VM_BUG_ON(PageActive(page));
339894bc310SLee Schermerhorn 		ClearPageUnevictable(page);
340f04e9ebbSKOSAKI Motohiro 	}
3411da177e4SLinus Torvalds 
342894bc310SLee Schermerhorn 	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
343f04e9ebbSKOSAKI Motohiro 	__lru_cache_add(page, lru);
3441da177e4SLinus Torvalds }
3451da177e4SLinus Torvalds 
346894bc310SLee Schermerhorn /**
347894bc310SLee Schermerhorn  * add_page_to_unevictable_list - add a page to the unevictable list
348894bc310SLee Schermerhorn  * @page:  the page to be added to the unevictable list
349894bc310SLee Schermerhorn  *
350894bc310SLee Schermerhorn  * Add page directly to its zone's unevictable list.  To avoid races with
351894bc310SLee Schermerhorn  * tasks that might be making the page evictable, through eg. munlock,
352894bc310SLee Schermerhorn  * munmap or exit, while it's not on the lru, we want to add the page
353894bc310SLee Schermerhorn  * while it's locked or otherwise "invisible" to other tasks.  This is
354894bc310SLee Schermerhorn  * difficult to do when using the pagevec cache, so bypass that.
355894bc310SLee Schermerhorn  */
356894bc310SLee Schermerhorn void add_page_to_unevictable_list(struct page *page)
357894bc310SLee Schermerhorn {
358894bc310SLee Schermerhorn 	struct zone *zone = page_zone(page);
359894bc310SLee Schermerhorn 
360894bc310SLee Schermerhorn 	spin_lock_irq(&zone->lru_lock);
361894bc310SLee Schermerhorn 	SetPageUnevictable(page);
362894bc310SLee Schermerhorn 	SetPageLRU(page);
363894bc310SLee Schermerhorn 	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
364894bc310SLee Schermerhorn 	spin_unlock_irq(&zone->lru_lock);
365894bc310SLee Schermerhorn }
366894bc310SLee Schermerhorn 
367902aaed0SHisashi Hifumi /*
368902aaed0SHisashi Hifumi  * Drain pages out of the cpu's pagevecs.
369902aaed0SHisashi Hifumi  * Either "cpu" is the current CPU, and preemption has already been
370902aaed0SHisashi Hifumi  * disabled; or "cpu" is being hot-unplugged, and is already dead.
371902aaed0SHisashi Hifumi  */
372902aaed0SHisashi Hifumi static void drain_cpu_pagevecs(int cpu)
3731da177e4SLinus Torvalds {
374f04e9ebbSKOSAKI Motohiro 	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
375902aaed0SHisashi Hifumi 	struct pagevec *pvec;
376f04e9ebbSKOSAKI Motohiro 	int lru;
3771da177e4SLinus Torvalds 
378f04e9ebbSKOSAKI Motohiro 	for_each_lru(lru) {
379f04e9ebbSKOSAKI Motohiro 		pvec = &pvecs[lru - LRU_BASE];
3801da177e4SLinus Torvalds 		if (pagevec_count(pvec))
381f04e9ebbSKOSAKI Motohiro 			____pagevec_lru_add(pvec, lru);
382f04e9ebbSKOSAKI Motohiro 	}
383902aaed0SHisashi Hifumi 
384902aaed0SHisashi Hifumi 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
385902aaed0SHisashi Hifumi 	if (pagevec_count(pvec)) {
386902aaed0SHisashi Hifumi 		unsigned long flags;
387902aaed0SHisashi Hifumi 
388902aaed0SHisashi Hifumi 		/* No harm done if a racing interrupt already did this */
389902aaed0SHisashi Hifumi 		local_irq_save(flags);
390902aaed0SHisashi Hifumi 		pagevec_move_tail(pvec);
391902aaed0SHisashi Hifumi 		local_irq_restore(flags);
392902aaed0SHisashi Hifumi 	}
39380bfed90SAndrew Morton }
39480bfed90SAndrew Morton 
39580bfed90SAndrew Morton void lru_add_drain(void)
39680bfed90SAndrew Morton {
397902aaed0SHisashi Hifumi 	drain_cpu_pagevecs(get_cpu());
39880bfed90SAndrew Morton 	put_cpu();
3991da177e4SLinus Torvalds }
4001da177e4SLinus Torvalds 
401c4028958SDavid Howells static void lru_add_drain_per_cpu(struct work_struct *dummy)
402053837fcSNick Piggin {
403053837fcSNick Piggin 	lru_add_drain();
404053837fcSNick Piggin }
405053837fcSNick Piggin 
406053837fcSNick Piggin /*
407053837fcSNick Piggin  * Returns 0 for success
408053837fcSNick Piggin  */
409053837fcSNick Piggin int lru_add_drain_all(void)
410053837fcSNick Piggin {
411c4028958SDavid Howells 	return schedule_on_each_cpu(lru_add_drain_per_cpu);
412053837fcSNick Piggin }
413053837fcSNick Piggin 
4141da177e4SLinus Torvalds /*
4151da177e4SLinus Torvalds  * Batched page_cache_release().  Decrement the reference count on all the
4161da177e4SLinus Torvalds  * passed pages.  If it fell to zero then remove the page from the LRU and
4171da177e4SLinus Torvalds  * free it.
4181da177e4SLinus Torvalds  *
4191da177e4SLinus Torvalds  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
4201da177e4SLinus Torvalds  * for the remainder of the operation.
4211da177e4SLinus Torvalds  *
422ab33dc09SFernando Luis Vazquez Cao  * The locking in this function is against shrink_inactive_list(): we recheck
423ab33dc09SFernando Luis Vazquez Cao  * the page count inside the lock to see whether shrink_inactive_list()
424ab33dc09SFernando Luis Vazquez Cao  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
425ab33dc09SFernando Luis Vazquez Cao  * will free it.
4261da177e4SLinus Torvalds  */
4271da177e4SLinus Torvalds void release_pages(struct page **pages, int nr, int cold)
4281da177e4SLinus Torvalds {
4291da177e4SLinus Torvalds 	int i;
4301da177e4SLinus Torvalds 	struct pagevec pages_to_free;
4311da177e4SLinus Torvalds 	struct zone *zone = NULL;
432902aaed0SHisashi Hifumi 	unsigned long uninitialized_var(flags);
4331da177e4SLinus Torvalds 
4341da177e4SLinus Torvalds 	pagevec_init(&pages_to_free, cold);
4351da177e4SLinus Torvalds 	for (i = 0; i < nr; i++) {
4361da177e4SLinus Torvalds 		struct page *page = pages[i];
4371da177e4SLinus Torvalds 
4388519fb30SNick Piggin 		if (unlikely(PageCompound(page))) {
4398519fb30SNick Piggin 			if (zone) {
440902aaed0SHisashi Hifumi 				spin_unlock_irqrestore(&zone->lru_lock, flags);
4418519fb30SNick Piggin 				zone = NULL;
4428519fb30SNick Piggin 			}
4438519fb30SNick Piggin 			put_compound_page(page);
4448519fb30SNick Piggin 			continue;
4458519fb30SNick Piggin 		}
4468519fb30SNick Piggin 
447b5810039SNick Piggin 		if (!put_page_testzero(page))
4481da177e4SLinus Torvalds 			continue;
4491da177e4SLinus Torvalds 
45046453a6eSNick Piggin 		if (PageLRU(page)) {
45146453a6eSNick Piggin 			struct zone *pagezone = page_zone(page);
452894bc310SLee Schermerhorn 
4531da177e4SLinus Torvalds 			if (pagezone != zone) {
4541da177e4SLinus Torvalds 				if (zone)
455902aaed0SHisashi Hifumi 					spin_unlock_irqrestore(&zone->lru_lock,
456902aaed0SHisashi Hifumi 									flags);
4571da177e4SLinus Torvalds 				zone = pagezone;
458902aaed0SHisashi Hifumi 				spin_lock_irqsave(&zone->lru_lock, flags);
4591da177e4SLinus Torvalds 			}
460725d704eSNick Piggin 			VM_BUG_ON(!PageLRU(page));
46167453911SNick Piggin 			__ClearPageLRU(page);
4621da177e4SLinus Torvalds 			del_page_from_lru(zone, page);
46346453a6eSNick Piggin 		}
46446453a6eSNick Piggin 
4651da177e4SLinus Torvalds 		if (!pagevec_add(&pages_to_free, page)) {
46646453a6eSNick Piggin 			if (zone) {
467902aaed0SHisashi Hifumi 				spin_unlock_irqrestore(&zone->lru_lock, flags);
46846453a6eSNick Piggin 				zone = NULL;
46946453a6eSNick Piggin 			}
4701da177e4SLinus Torvalds 			__pagevec_free(&pages_to_free);
4711da177e4SLinus Torvalds 			pagevec_reinit(&pages_to_free);
4721da177e4SLinus Torvalds   		}
4731da177e4SLinus Torvalds 	}
4741da177e4SLinus Torvalds 	if (zone)
475902aaed0SHisashi Hifumi 		spin_unlock_irqrestore(&zone->lru_lock, flags);
4761da177e4SLinus Torvalds 
4771da177e4SLinus Torvalds 	pagevec_free(&pages_to_free);
4781da177e4SLinus Torvalds }
4790be8557bSMiklos Szeredi EXPORT_SYMBOL(release_pages);
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds /*
4821da177e4SLinus Torvalds  * The pages which we're about to release may be in the deferred lru-addition
4831da177e4SLinus Torvalds  * queues.  That would prevent them from really being freed right now.  That's
4841da177e4SLinus Torvalds  * OK from a correctness point of view but is inefficient - those pages may be
4851da177e4SLinus Torvalds  * cache-warm and we want to give them back to the page allocator ASAP.
4861da177e4SLinus Torvalds  *
4871da177e4SLinus Torvalds  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
4881da177e4SLinus Torvalds  * and __pagevec_lru_add_active() call release_pages() directly to avoid
4891da177e4SLinus Torvalds  * mutual recursion.
4901da177e4SLinus Torvalds  */
4911da177e4SLinus Torvalds void __pagevec_release(struct pagevec *pvec)
4921da177e4SLinus Torvalds {
4931da177e4SLinus Torvalds 	lru_add_drain();
4941da177e4SLinus Torvalds 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
4951da177e4SLinus Torvalds 	pagevec_reinit(pvec);
4961da177e4SLinus Torvalds }
4971da177e4SLinus Torvalds 
4987f285701SSteve French EXPORT_SYMBOL(__pagevec_release);
4997f285701SSteve French 
50071e3aac0SAndrea Arcangeli /* used by __split_huge_page_refcount() */
50171e3aac0SAndrea Arcangeli void lru_add_page_tail(struct zone* zone,
50271e3aac0SAndrea Arcangeli 		       struct page *page, struct page *page_tail)
50371e3aac0SAndrea Arcangeli {
50471e3aac0SAndrea Arcangeli 	int active;
50571e3aac0SAndrea Arcangeli 	enum lru_list lru;
50671e3aac0SAndrea Arcangeli 	const int file = 0;
50771e3aac0SAndrea Arcangeli 	struct list_head *head;
50871e3aac0SAndrea Arcangeli 
50971e3aac0SAndrea Arcangeli 	VM_BUG_ON(!PageHead(page));
51071e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageCompound(page_tail));
51171e3aac0SAndrea Arcangeli 	VM_BUG_ON(PageLRU(page_tail));
51271e3aac0SAndrea Arcangeli 	VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
51371e3aac0SAndrea Arcangeli 
51471e3aac0SAndrea Arcangeli 	SetPageLRU(page_tail);
51571e3aac0SAndrea Arcangeli 
51671e3aac0SAndrea Arcangeli 	if (page_evictable(page_tail, NULL)) {
51771e3aac0SAndrea Arcangeli 		if (PageActive(page)) {
51871e3aac0SAndrea Arcangeli 			SetPageActive(page_tail);
51971e3aac0SAndrea Arcangeli 			active = 1;
52071e3aac0SAndrea Arcangeli 			lru = LRU_ACTIVE_ANON;
52171e3aac0SAndrea Arcangeli 		} else {
52271e3aac0SAndrea Arcangeli 			active = 0;
52371e3aac0SAndrea Arcangeli 			lru = LRU_INACTIVE_ANON;
52471e3aac0SAndrea Arcangeli 		}
52571e3aac0SAndrea Arcangeli 		update_page_reclaim_stat(zone, page_tail, file, active);
52671e3aac0SAndrea Arcangeli 		if (likely(PageLRU(page)))
52771e3aac0SAndrea Arcangeli 			head = page->lru.prev;
52871e3aac0SAndrea Arcangeli 		else
52971e3aac0SAndrea Arcangeli 			head = &zone->lru[lru].list;
53071e3aac0SAndrea Arcangeli 		__add_page_to_lru_list(zone, page_tail, lru, head);
53171e3aac0SAndrea Arcangeli 	} else {
53271e3aac0SAndrea Arcangeli 		SetPageUnevictable(page_tail);
53371e3aac0SAndrea Arcangeli 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
53471e3aac0SAndrea Arcangeli 	}
53571e3aac0SAndrea Arcangeli }
53671e3aac0SAndrea Arcangeli 
537d8505deeSShaohua Li static void ____pagevec_lru_add_fn(struct page *page, void *arg)
538d8505deeSShaohua Li {
539d8505deeSShaohua Li 	enum lru_list lru = (enum lru_list)arg;
540d8505deeSShaohua Li 	struct zone *zone = page_zone(page);
541d8505deeSShaohua Li 	int file = is_file_lru(lru);
542d8505deeSShaohua Li 	int active = is_active_lru(lru);
543d8505deeSShaohua Li 
544d8505deeSShaohua Li 	VM_BUG_ON(PageActive(page));
545d8505deeSShaohua Li 	VM_BUG_ON(PageUnevictable(page));
546d8505deeSShaohua Li 	VM_BUG_ON(PageLRU(page));
547d8505deeSShaohua Li 
548d8505deeSShaohua Li 	SetPageLRU(page);
549d8505deeSShaohua Li 	if (active)
550d8505deeSShaohua Li 		SetPageActive(page);
551d8505deeSShaohua Li 	update_page_reclaim_stat(zone, page, file, active);
552d8505deeSShaohua Li 	add_page_to_lru_list(zone, page, lru);
553d8505deeSShaohua Li }
554d8505deeSShaohua Li 
5551da177e4SLinus Torvalds /*
5561da177e4SLinus Torvalds  * Add the passed pages to the LRU, then drop the caller's refcount
5571da177e4SLinus Torvalds  * on them.  Reinitialises the caller's pagevec.
5581da177e4SLinus Torvalds  */
559f04e9ebbSKOSAKI Motohiro void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
5601da177e4SLinus Torvalds {
561894bc310SLee Schermerhorn 	VM_BUG_ON(is_unevictable_lru(lru));
5621da177e4SLinus Torvalds 
563d8505deeSShaohua Li 	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
5641da177e4SLinus Torvalds }
5651da177e4SLinus Torvalds 
566f04e9ebbSKOSAKI Motohiro EXPORT_SYMBOL(____pagevec_lru_add);
567f04e9ebbSKOSAKI Motohiro 
5681da177e4SLinus Torvalds /*
5691da177e4SLinus Torvalds  * Try to drop buffers from the pages in a pagevec
5701da177e4SLinus Torvalds  */
5711da177e4SLinus Torvalds void pagevec_strip(struct pagevec *pvec)
5721da177e4SLinus Torvalds {
5731da177e4SLinus Torvalds 	int i;
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	for (i = 0; i < pagevec_count(pvec); i++) {
5761da177e4SLinus Torvalds 		struct page *page = pvec->pages[i];
5771da177e4SLinus Torvalds 
578266cf658SDavid Howells 		if (page_has_private(page) && trylock_page(page)) {
579266cf658SDavid Howells 			if (page_has_private(page))
5801da177e4SLinus Torvalds 				try_to_release_page(page, 0);
5811da177e4SLinus Torvalds 			unlock_page(page);
5821da177e4SLinus Torvalds 		}
5831da177e4SLinus Torvalds 	}
5841da177e4SLinus Torvalds }
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds /**
5871da177e4SLinus Torvalds  * pagevec_lookup - gang pagecache lookup
5881da177e4SLinus Torvalds  * @pvec:	Where the resulting pages are placed
5891da177e4SLinus Torvalds  * @mapping:	The address_space to search
5901da177e4SLinus Torvalds  * @start:	The starting page index
5911da177e4SLinus Torvalds  * @nr_pages:	The maximum number of pages
5921da177e4SLinus Torvalds  *
5931da177e4SLinus Torvalds  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
5941da177e4SLinus Torvalds  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
5951da177e4SLinus Torvalds  * reference against the pages in @pvec.
5961da177e4SLinus Torvalds  *
5971da177e4SLinus Torvalds  * The search returns a group of mapping-contiguous pages with ascending
5981da177e4SLinus Torvalds  * indexes.  There may be holes in the indices due to not-present pages.
5991da177e4SLinus Torvalds  *
6001da177e4SLinus Torvalds  * pagevec_lookup() returns the number of pages which were found.
6011da177e4SLinus Torvalds  */
6021da177e4SLinus Torvalds unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
6031da177e4SLinus Torvalds 		pgoff_t start, unsigned nr_pages)
6041da177e4SLinus Torvalds {
6051da177e4SLinus Torvalds 	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
6061da177e4SLinus Torvalds 	return pagevec_count(pvec);
6071da177e4SLinus Torvalds }
6081da177e4SLinus Torvalds 
60978539fdfSChristoph Hellwig EXPORT_SYMBOL(pagevec_lookup);
61078539fdfSChristoph Hellwig 
6111da177e4SLinus Torvalds unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
6121da177e4SLinus Torvalds 		pgoff_t *index, int tag, unsigned nr_pages)
6131da177e4SLinus Torvalds {
6141da177e4SLinus Torvalds 	pvec->nr = find_get_pages_tag(mapping, index, tag,
6151da177e4SLinus Torvalds 					nr_pages, pvec->pages);
6161da177e4SLinus Torvalds 	return pagevec_count(pvec);
6171da177e4SLinus Torvalds }
6181da177e4SLinus Torvalds 
6197f285701SSteve French EXPORT_SYMBOL(pagevec_lookup_tag);
6201da177e4SLinus Torvalds 
6211da177e4SLinus Torvalds /*
6221da177e4SLinus Torvalds  * Perform any setup for the swap system
6231da177e4SLinus Torvalds  */
6241da177e4SLinus Torvalds void __init swap_setup(void)
6251da177e4SLinus Torvalds {
6264481374cSJan Beulich 	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
6271da177e4SLinus Torvalds 
628e0bf68ddSPeter Zijlstra #ifdef CONFIG_SWAP
629e0bf68ddSPeter Zijlstra 	bdi_init(swapper_space.backing_dev_info);
630e0bf68ddSPeter Zijlstra #endif
631e0bf68ddSPeter Zijlstra 
6321da177e4SLinus Torvalds 	/* Use a smaller cluster for small-memory machines */
6331da177e4SLinus Torvalds 	if (megs < 16)
6341da177e4SLinus Torvalds 		page_cluster = 2;
6351da177e4SLinus Torvalds 	else
6361da177e4SLinus Torvalds 		page_cluster = 3;
6371da177e4SLinus Torvalds 	/*
6381da177e4SLinus Torvalds 	 * Right now other parts of the system means that we
6391da177e4SLinus Torvalds 	 * _really_ don't want to cluster much more
6401da177e4SLinus Torvalds 	 */
6411da177e4SLinus Torvalds }
642