xref: /linux/mm/swap_state.c (revision b8072f099b7829a6ff3eba618e1d079a81f753f8)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  linux/mm/swap_state.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
51da177e4SLinus Torvalds  *  Swap reorganised 29.12.95, Stephen Tweedie
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
81da177e4SLinus Torvalds  */
91da177e4SLinus Torvalds #include <linux/module.h>
101da177e4SLinus Torvalds #include <linux/mm.h>
111da177e4SLinus Torvalds #include <linux/kernel_stat.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/init.h>
141da177e4SLinus Torvalds #include <linux/pagemap.h>
151da177e4SLinus Torvalds #include <linux/buffer_head.h>
161da177e4SLinus Torvalds #include <linux/backing-dev.h>
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <asm/pgtable.h>
191da177e4SLinus Torvalds 
201da177e4SLinus Torvalds /*
211da177e4SLinus Torvalds  * swapper_space is a fiction, retained to simplify the path through
221da177e4SLinus Torvalds  * vmscan's shrink_list, to make sync_page look nicer, and to allow
231da177e4SLinus Torvalds  * future use of radix_tree tags in the swap cache.
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds static struct address_space_operations swap_aops = {
261da177e4SLinus Torvalds 	.writepage	= swap_writepage,
271da177e4SLinus Torvalds 	.sync_page	= block_sync_page,
281da177e4SLinus Torvalds 	.set_page_dirty	= __set_page_dirty_nobuffers,
291da177e4SLinus Torvalds };
301da177e4SLinus Torvalds 
311da177e4SLinus Torvalds static struct backing_dev_info swap_backing_dev_info = {
321da177e4SLinus Torvalds 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
331da177e4SLinus Torvalds 	.unplug_io_fn	= swap_unplug_io_fn,
341da177e4SLinus Torvalds };
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds struct address_space swapper_space = {
371da177e4SLinus Torvalds 	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
381da177e4SLinus Torvalds 	.tree_lock	= RW_LOCK_UNLOCKED,
391da177e4SLinus Torvalds 	.a_ops		= &swap_aops,
401da177e4SLinus Torvalds 	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
411da177e4SLinus Torvalds 	.backing_dev_info = &swap_backing_dev_info,
421da177e4SLinus Torvalds };
431da177e4SLinus Torvalds EXPORT_SYMBOL(swapper_space);
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds static struct {
481da177e4SLinus Torvalds 	unsigned long add_total;
491da177e4SLinus Torvalds 	unsigned long del_total;
501da177e4SLinus Torvalds 	unsigned long find_success;
511da177e4SLinus Torvalds 	unsigned long find_total;
521da177e4SLinus Torvalds 	unsigned long noent_race;
531da177e4SLinus Torvalds 	unsigned long exist_race;
541da177e4SLinus Torvalds } swap_cache_info;
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds void show_swap_cache_info(void)
571da177e4SLinus Torvalds {
581da177e4SLinus Torvalds 	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
591da177e4SLinus Torvalds 		swap_cache_info.add_total, swap_cache_info.del_total,
601da177e4SLinus Torvalds 		swap_cache_info.find_success, swap_cache_info.find_total,
611da177e4SLinus Torvalds 		swap_cache_info.noent_race, swap_cache_info.exist_race);
621da177e4SLinus Torvalds 	printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
631da177e4SLinus Torvalds 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
641da177e4SLinus Torvalds }
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds /*
671da177e4SLinus Torvalds  * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
681da177e4SLinus Torvalds  * but sets SwapCache flag and private instead of mapping and index.
691da177e4SLinus Torvalds  */
709de75d11SVictor Fusco static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
71dd0fc66fSAl Viro 			       gfp_t gfp_mask)
721da177e4SLinus Torvalds {
731da177e4SLinus Torvalds 	int error;
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds 	BUG_ON(PageSwapCache(page));
761da177e4SLinus Torvalds 	BUG_ON(PagePrivate(page));
771da177e4SLinus Torvalds 	error = radix_tree_preload(gfp_mask);
781da177e4SLinus Torvalds 	if (!error) {
791da177e4SLinus Torvalds 		write_lock_irq(&swapper_space.tree_lock);
801da177e4SLinus Torvalds 		error = radix_tree_insert(&swapper_space.page_tree,
811da177e4SLinus Torvalds 						entry.val, page);
821da177e4SLinus Torvalds 		if (!error) {
831da177e4SLinus Torvalds 			page_cache_get(page);
841da177e4SLinus Torvalds 			SetPageLocked(page);
851da177e4SLinus Torvalds 			SetPageSwapCache(page);
864c21e2f2SHugh Dickins 			set_page_private(page, entry.val);
871da177e4SLinus Torvalds 			total_swapcache_pages++;
881da177e4SLinus Torvalds 			pagecache_acct(1);
891da177e4SLinus Torvalds 		}
901da177e4SLinus Torvalds 		write_unlock_irq(&swapper_space.tree_lock);
911da177e4SLinus Torvalds 		radix_tree_preload_end();
921da177e4SLinus Torvalds 	}
931da177e4SLinus Torvalds 	return error;
941da177e4SLinus Torvalds }
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static int add_to_swap_cache(struct page *page, swp_entry_t entry)
971da177e4SLinus Torvalds {
981da177e4SLinus Torvalds 	int error;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	if (!swap_duplicate(entry)) {
1011da177e4SLinus Torvalds 		INC_CACHE_INFO(noent_race);
1021da177e4SLinus Torvalds 		return -ENOENT;
1031da177e4SLinus Torvalds 	}
1041da177e4SLinus Torvalds 	error = __add_to_swap_cache(page, entry, GFP_KERNEL);
1051da177e4SLinus Torvalds 	/*
1061da177e4SLinus Torvalds 	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
1071da177e4SLinus Torvalds 	 */
1081da177e4SLinus Torvalds 	if (error) {
1091da177e4SLinus Torvalds 		swap_free(entry);
1101da177e4SLinus Torvalds 		if (error == -EEXIST)
1111da177e4SLinus Torvalds 			INC_CACHE_INFO(exist_race);
1121da177e4SLinus Torvalds 		return error;
1131da177e4SLinus Torvalds 	}
1141da177e4SLinus Torvalds 	INC_CACHE_INFO(add_total);
1151da177e4SLinus Torvalds 	return 0;
1161da177e4SLinus Torvalds }
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds /*
1191da177e4SLinus Torvalds  * This must be called only on pages that have
1201da177e4SLinus Torvalds  * been verified to be in the swap cache.
1211da177e4SLinus Torvalds  */
1221da177e4SLinus Torvalds void __delete_from_swap_cache(struct page *page)
1231da177e4SLinus Torvalds {
1241da177e4SLinus Torvalds 	BUG_ON(!PageLocked(page));
1251da177e4SLinus Torvalds 	BUG_ON(!PageSwapCache(page));
1261da177e4SLinus Torvalds 	BUG_ON(PageWriteback(page));
1273279ffd9SHugh Dickins 	BUG_ON(PagePrivate(page));
1281da177e4SLinus Torvalds 
1294c21e2f2SHugh Dickins 	radix_tree_delete(&swapper_space.page_tree, page_private(page));
1304c21e2f2SHugh Dickins 	set_page_private(page, 0);
1311da177e4SLinus Torvalds 	ClearPageSwapCache(page);
1321da177e4SLinus Torvalds 	total_swapcache_pages--;
1331da177e4SLinus Torvalds 	pagecache_acct(-1);
1341da177e4SLinus Torvalds 	INC_CACHE_INFO(del_total);
1351da177e4SLinus Torvalds }
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds /**
1381da177e4SLinus Torvalds  * add_to_swap - allocate swap space for a page
1391da177e4SLinus Torvalds  * @page: page we want to move to swap
1401da177e4SLinus Torvalds  *
1411da177e4SLinus Torvalds  * Allocate swap space for the page and add the page to the
1421da177e4SLinus Torvalds  * swap cache.  Caller needs to hold the page lock.
1431da177e4SLinus Torvalds  */
1441da177e4SLinus Torvalds int add_to_swap(struct page * page)
1451da177e4SLinus Torvalds {
1461da177e4SLinus Torvalds 	swp_entry_t entry;
1471da177e4SLinus Torvalds 	int err;
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds 	if (!PageLocked(page))
1501da177e4SLinus Torvalds 		BUG();
1511da177e4SLinus Torvalds 
1521da177e4SLinus Torvalds 	for (;;) {
1531da177e4SLinus Torvalds 		entry = get_swap_page();
1541da177e4SLinus Torvalds 		if (!entry.val)
1551da177e4SLinus Torvalds 			return 0;
1561da177e4SLinus Torvalds 
157bd53b714SNick Piggin 		/*
158bd53b714SNick Piggin 		 * Radix-tree node allocations from PF_MEMALLOC contexts could
159bd53b714SNick Piggin 		 * completely exhaust the page allocator. __GFP_NOMEMALLOC
160bd53b714SNick Piggin 		 * stops emergency reserves from being allocated.
1611da177e4SLinus Torvalds 		 *
162bd53b714SNick Piggin 		 * TODO: this could cause a theoretical memory reclaim
163bd53b714SNick Piggin 		 * deadlock in the swap out path.
1641da177e4SLinus Torvalds 		 */
1651da177e4SLinus Torvalds 		/*
1661da177e4SLinus Torvalds 		 * Add it to the swap cache and mark it dirty
1671da177e4SLinus Torvalds 		 */
168bd53b714SNick Piggin 		err = __add_to_swap_cache(page, entry,
169bd53b714SNick Piggin 				GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN);
1701da177e4SLinus Torvalds 
1711da177e4SLinus Torvalds 		switch (err) {
1721da177e4SLinus Torvalds 		case 0:				/* Success */
1731da177e4SLinus Torvalds 			SetPageUptodate(page);
1741da177e4SLinus Torvalds 			SetPageDirty(page);
1751da177e4SLinus Torvalds 			INC_CACHE_INFO(add_total);
1761da177e4SLinus Torvalds 			return 1;
1771da177e4SLinus Torvalds 		case -EEXIST:
1781da177e4SLinus Torvalds 			/* Raced with "speculative" read_swap_cache_async */
1791da177e4SLinus Torvalds 			INC_CACHE_INFO(exist_race);
1801da177e4SLinus Torvalds 			swap_free(entry);
1811da177e4SLinus Torvalds 			continue;
1821da177e4SLinus Torvalds 		default:
1831da177e4SLinus Torvalds 			/* -ENOMEM radix-tree allocation failure */
1841da177e4SLinus Torvalds 			swap_free(entry);
1851da177e4SLinus Torvalds 			return 0;
1861da177e4SLinus Torvalds 		}
1871da177e4SLinus Torvalds 	}
1881da177e4SLinus Torvalds }
1891da177e4SLinus Torvalds 
1901da177e4SLinus Torvalds /*
1911da177e4SLinus Torvalds  * This must be called only on pages that have
1921da177e4SLinus Torvalds  * been verified to be in the swap cache and locked.
1931da177e4SLinus Torvalds  * It will never put the page into the free list,
1941da177e4SLinus Torvalds  * the caller has a reference on the page.
1951da177e4SLinus Torvalds  */
1961da177e4SLinus Torvalds void delete_from_swap_cache(struct page *page)
1971da177e4SLinus Torvalds {
1981da177e4SLinus Torvalds 	swp_entry_t entry;
1991da177e4SLinus Torvalds 
2004c21e2f2SHugh Dickins 	entry.val = page_private(page);
2011da177e4SLinus Torvalds 
2021da177e4SLinus Torvalds 	write_lock_irq(&swapper_space.tree_lock);
2031da177e4SLinus Torvalds 	__delete_from_swap_cache(page);
2041da177e4SLinus Torvalds 	write_unlock_irq(&swapper_space.tree_lock);
2051da177e4SLinus Torvalds 
2061da177e4SLinus Torvalds 	swap_free(entry);
2071da177e4SLinus Torvalds 	page_cache_release(page);
2081da177e4SLinus Torvalds }
2091da177e4SLinus Torvalds 
2101da177e4SLinus Torvalds /*
2111da177e4SLinus Torvalds  * Strange swizzling function only for use by shmem_writepage
2121da177e4SLinus Torvalds  */
2131da177e4SLinus Torvalds int move_to_swap_cache(struct page *page, swp_entry_t entry)
2141da177e4SLinus Torvalds {
2151da177e4SLinus Torvalds 	int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
2161da177e4SLinus Torvalds 	if (!err) {
2171da177e4SLinus Torvalds 		remove_from_page_cache(page);
2181da177e4SLinus Torvalds 		page_cache_release(page);	/* pagecache ref */
2191da177e4SLinus Torvalds 		if (!swap_duplicate(entry))
2201da177e4SLinus Torvalds 			BUG();
2211da177e4SLinus Torvalds 		SetPageDirty(page);
2221da177e4SLinus Torvalds 		INC_CACHE_INFO(add_total);
2231da177e4SLinus Torvalds 	} else if (err == -EEXIST)
2241da177e4SLinus Torvalds 		INC_CACHE_INFO(exist_race);
2251da177e4SLinus Torvalds 	return err;
2261da177e4SLinus Torvalds }
2271da177e4SLinus Torvalds 
2281da177e4SLinus Torvalds /*
2291da177e4SLinus Torvalds  * Strange swizzling function for shmem_getpage (and shmem_unuse)
2301da177e4SLinus Torvalds  */
2311da177e4SLinus Torvalds int move_from_swap_cache(struct page *page, unsigned long index,
2321da177e4SLinus Torvalds 		struct address_space *mapping)
2331da177e4SLinus Torvalds {
2341da177e4SLinus Torvalds 	int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
2351da177e4SLinus Torvalds 	if (!err) {
2361da177e4SLinus Torvalds 		delete_from_swap_cache(page);
2371da177e4SLinus Torvalds 		/* shift page from clean_pages to dirty_pages list */
2381da177e4SLinus Torvalds 		ClearPageDirty(page);
2391da177e4SLinus Torvalds 		set_page_dirty(page);
2401da177e4SLinus Torvalds 	}
2411da177e4SLinus Torvalds 	return err;
2421da177e4SLinus Torvalds }
2431da177e4SLinus Torvalds 
2441da177e4SLinus Torvalds /*
2451da177e4SLinus Torvalds  * If we are the only user, then try to free up the swap cache.
2461da177e4SLinus Torvalds  *
2471da177e4SLinus Torvalds  * Its ok to check for PageSwapCache without the page lock
2481da177e4SLinus Torvalds  * here because we are going to recheck again inside
2491da177e4SLinus Torvalds  * exclusive_swap_page() _with_ the lock.
2501da177e4SLinus Torvalds  * 					- Marcelo
2511da177e4SLinus Torvalds  */
2521da177e4SLinus Torvalds static inline void free_swap_cache(struct page *page)
2531da177e4SLinus Torvalds {
2541da177e4SLinus Torvalds 	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
2551da177e4SLinus Torvalds 		remove_exclusive_swap_page(page);
2561da177e4SLinus Torvalds 		unlock_page(page);
2571da177e4SLinus Torvalds 	}
2581da177e4SLinus Torvalds }
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds /*
2611da177e4SLinus Torvalds  * Perform a free_page(), also freeing any swap cache associated with
262*b8072f09SHugh Dickins  * this page if it is the last user of the page.
2631da177e4SLinus Torvalds  */
2641da177e4SLinus Torvalds void free_page_and_swap_cache(struct page *page)
2651da177e4SLinus Torvalds {
2661da177e4SLinus Torvalds 	free_swap_cache(page);
2671da177e4SLinus Torvalds 	page_cache_release(page);
2681da177e4SLinus Torvalds }
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds /*
2711da177e4SLinus Torvalds  * Passed an array of pages, drop them all from swapcache and then release
2721da177e4SLinus Torvalds  * them.  They are removed from the LRU and freed if this is their last use.
2731da177e4SLinus Torvalds  */
2741da177e4SLinus Torvalds void free_pages_and_swap_cache(struct page **pages, int nr)
2751da177e4SLinus Torvalds {
2761da177e4SLinus Torvalds 	int chunk = 16;
2771da177e4SLinus Torvalds 	struct page **pagep = pages;
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds 	lru_add_drain();
2801da177e4SLinus Torvalds 	while (nr) {
2811da177e4SLinus Torvalds 		int todo = min(chunk, nr);
2821da177e4SLinus Torvalds 		int i;
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 		for (i = 0; i < todo; i++)
2851da177e4SLinus Torvalds 			free_swap_cache(pagep[i]);
2861da177e4SLinus Torvalds 		release_pages(pagep, todo, 0);
2871da177e4SLinus Torvalds 		pagep += todo;
2881da177e4SLinus Torvalds 		nr -= todo;
2891da177e4SLinus Torvalds 	}
2901da177e4SLinus Torvalds }
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds /*
2931da177e4SLinus Torvalds  * Lookup a swap entry in the swap cache. A found page will be returned
2941da177e4SLinus Torvalds  * unlocked and with its refcount incremented - we rely on the kernel
2951da177e4SLinus Torvalds  * lock getting page table operations atomic even if we drop the page
2961da177e4SLinus Torvalds  * lock before returning.
2971da177e4SLinus Torvalds  */
2981da177e4SLinus Torvalds struct page * lookup_swap_cache(swp_entry_t entry)
2991da177e4SLinus Torvalds {
3001da177e4SLinus Torvalds 	struct page *page;
3011da177e4SLinus Torvalds 
3021da177e4SLinus Torvalds 	page = find_get_page(&swapper_space, entry.val);
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds 	if (page)
3051da177e4SLinus Torvalds 		INC_CACHE_INFO(find_success);
3061da177e4SLinus Torvalds 
3071da177e4SLinus Torvalds 	INC_CACHE_INFO(find_total);
3081da177e4SLinus Torvalds 	return page;
3091da177e4SLinus Torvalds }
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds /*
3121da177e4SLinus Torvalds  * Locate a page of swap in physical memory, reserving swap cache space
3131da177e4SLinus Torvalds  * and reading the disk if it is not already cached.
3141da177e4SLinus Torvalds  * A failure return means that either the page allocation failed or that
3151da177e4SLinus Torvalds  * the swap entry is no longer in use.
3161da177e4SLinus Torvalds  */
3171da177e4SLinus Torvalds struct page *read_swap_cache_async(swp_entry_t entry,
3181da177e4SLinus Torvalds 			struct vm_area_struct *vma, unsigned long addr)
3191da177e4SLinus Torvalds {
3201da177e4SLinus Torvalds 	struct page *found_page, *new_page = NULL;
3211da177e4SLinus Torvalds 	int err;
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	do {
3241da177e4SLinus Torvalds 		/*
3251da177e4SLinus Torvalds 		 * First check the swap cache.  Since this is normally
3261da177e4SLinus Torvalds 		 * called after lookup_swap_cache() failed, re-calling
3271da177e4SLinus Torvalds 		 * that would confuse statistics.
3281da177e4SLinus Torvalds 		 */
3291da177e4SLinus Torvalds 		found_page = find_get_page(&swapper_space, entry.val);
3301da177e4SLinus Torvalds 		if (found_page)
3311da177e4SLinus Torvalds 			break;
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds 		/*
3341da177e4SLinus Torvalds 		 * Get a new page to read into from swap.
3351da177e4SLinus Torvalds 		 */
3361da177e4SLinus Torvalds 		if (!new_page) {
3371da177e4SLinus Torvalds 			new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
3381da177e4SLinus Torvalds 			if (!new_page)
3391da177e4SLinus Torvalds 				break;		/* Out of memory */
3401da177e4SLinus Torvalds 		}
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds 		/*
3431da177e4SLinus Torvalds 		 * Associate the page with swap entry in the swap cache.
3441da177e4SLinus Torvalds 		 * May fail (-ENOENT) if swap entry has been freed since
3451da177e4SLinus Torvalds 		 * our caller observed it.  May fail (-EEXIST) if there
3461da177e4SLinus Torvalds 		 * is already a page associated with this entry in the
3471da177e4SLinus Torvalds 		 * swap cache: added by a racing read_swap_cache_async,
3481da177e4SLinus Torvalds 		 * or by try_to_swap_out (or shmem_writepage) re-using
3491da177e4SLinus Torvalds 		 * the just freed swap entry for an existing page.
3501da177e4SLinus Torvalds 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
3511da177e4SLinus Torvalds 		 */
3521da177e4SLinus Torvalds 		err = add_to_swap_cache(new_page, entry);
3531da177e4SLinus Torvalds 		if (!err) {
3541da177e4SLinus Torvalds 			/*
3551da177e4SLinus Torvalds 			 * Initiate read into locked page and return.
3561da177e4SLinus Torvalds 			 */
3571da177e4SLinus Torvalds 			lru_cache_add_active(new_page);
3581da177e4SLinus Torvalds 			swap_readpage(NULL, new_page);
3591da177e4SLinus Torvalds 			return new_page;
3601da177e4SLinus Torvalds 		}
3611da177e4SLinus Torvalds 	} while (err != -ENOENT && err != -ENOMEM);
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds 	if (new_page)
3641da177e4SLinus Torvalds 		page_cache_release(new_page);
3651da177e4SLinus Torvalds 	return found_page;
3661da177e4SLinus Torvalds }
367