11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap_state.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Rewritten to use page cache, (C) 1998 Stephen Tweedie 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds #include <linux/module.h> 101da177e4SLinus Torvalds #include <linux/mm.h> 111da177e4SLinus Torvalds #include <linux/kernel_stat.h> 121da177e4SLinus Torvalds #include <linux/swap.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/pagemap.h> 151da177e4SLinus Torvalds #include <linux/buffer_head.h> 161da177e4SLinus Torvalds #include <linux/backing-dev.h> 17c484d410SHugh Dickins #include <linux/pagevec.h> 18b20a3503SChristoph Lameter #include <linux/migrate.h> 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds #include <asm/pgtable.h> 211da177e4SLinus Torvalds 221da177e4SLinus Torvalds /* 231da177e4SLinus Torvalds * swapper_space is a fiction, retained to simplify the path through 241da177e4SLinus Torvalds * vmscan's shrink_list, to make sync_page look nicer, and to allow 251da177e4SLinus Torvalds * future use of radix_tree tags in the swap cache. 261da177e4SLinus Torvalds */ 27*f5e54d6eSChristoph Hellwig static const struct address_space_operations swap_aops = { 281da177e4SLinus Torvalds .writepage = swap_writepage, 291da177e4SLinus Torvalds .sync_page = block_sync_page, 301da177e4SLinus Torvalds .set_page_dirty = __set_page_dirty_nobuffers, 31e965f963SChristoph Lameter .migratepage = migrate_page, 321da177e4SLinus Torvalds }; 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds static struct backing_dev_info swap_backing_dev_info = { 351da177e4SLinus Torvalds .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 361da177e4SLinus Torvalds .unplug_io_fn = swap_unplug_io_fn, 371da177e4SLinus Torvalds }; 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds struct address_space swapper_space = { 401da177e4SLinus Torvalds .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 411da177e4SLinus Torvalds .tree_lock = RW_LOCK_UNLOCKED, 421da177e4SLinus Torvalds .a_ops = &swap_aops, 431da177e4SLinus Torvalds .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 441da177e4SLinus Torvalds .backing_dev_info = &swap_backing_dev_info, 451da177e4SLinus Torvalds }; 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds static struct { 501da177e4SLinus Torvalds unsigned long add_total; 511da177e4SLinus Torvalds unsigned long del_total; 521da177e4SLinus Torvalds unsigned long find_success; 531da177e4SLinus Torvalds unsigned long find_total; 541da177e4SLinus Torvalds unsigned long noent_race; 551da177e4SLinus Torvalds unsigned long exist_race; 561da177e4SLinus Torvalds } swap_cache_info; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds void show_swap_cache_info(void) 591da177e4SLinus Torvalds { 601da177e4SLinus Torvalds printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", 611da177e4SLinus Torvalds swap_cache_info.add_total, swap_cache_info.del_total, 621da177e4SLinus Torvalds swap_cache_info.find_success, swap_cache_info.find_total, 631da177e4SLinus Torvalds swap_cache_info.noent_race, swap_cache_info.exist_race); 641da177e4SLinus Torvalds printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 651da177e4SLinus Torvalds printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 661da177e4SLinus Torvalds } 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds /* 691da177e4SLinus Torvalds * __add_to_swap_cache resembles add_to_page_cache on swapper_space, 701da177e4SLinus Torvalds * but sets SwapCache flag and private instead of mapping and index. 711da177e4SLinus Torvalds */ 729de75d11SVictor Fusco static int __add_to_swap_cache(struct page *page, swp_entry_t entry, 73dd0fc66fSAl Viro gfp_t gfp_mask) 741da177e4SLinus Torvalds { 751da177e4SLinus Torvalds int error; 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds BUG_ON(PageSwapCache(page)); 781da177e4SLinus Torvalds BUG_ON(PagePrivate(page)); 791da177e4SLinus Torvalds error = radix_tree_preload(gfp_mask); 801da177e4SLinus Torvalds if (!error) { 811da177e4SLinus Torvalds write_lock_irq(&swapper_space.tree_lock); 821da177e4SLinus Torvalds error = radix_tree_insert(&swapper_space.page_tree, 831da177e4SLinus Torvalds entry.val, page); 841da177e4SLinus Torvalds if (!error) { 851da177e4SLinus Torvalds page_cache_get(page); 861da177e4SLinus Torvalds SetPageLocked(page); 871da177e4SLinus Torvalds SetPageSwapCache(page); 884c21e2f2SHugh Dickins set_page_private(page, entry.val); 891da177e4SLinus Torvalds total_swapcache_pages++; 901da177e4SLinus Torvalds pagecache_acct(1); 911da177e4SLinus Torvalds } 921da177e4SLinus Torvalds write_unlock_irq(&swapper_space.tree_lock); 931da177e4SLinus Torvalds radix_tree_preload_end(); 941da177e4SLinus Torvalds } 951da177e4SLinus Torvalds return error; 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds static int add_to_swap_cache(struct page *page, swp_entry_t entry) 991da177e4SLinus Torvalds { 1001da177e4SLinus Torvalds int error; 1011da177e4SLinus Torvalds 1021da177e4SLinus Torvalds if (!swap_duplicate(entry)) { 1031da177e4SLinus Torvalds INC_CACHE_INFO(noent_race); 1041da177e4SLinus Torvalds return -ENOENT; 1051da177e4SLinus Torvalds } 1061da177e4SLinus Torvalds error = __add_to_swap_cache(page, entry, GFP_KERNEL); 1071da177e4SLinus Torvalds /* 1081da177e4SLinus Torvalds * Anon pages are already on the LRU, we don't run lru_cache_add here. 1091da177e4SLinus Torvalds */ 1101da177e4SLinus Torvalds if (error) { 1111da177e4SLinus Torvalds swap_free(entry); 1121da177e4SLinus Torvalds if (error == -EEXIST) 1131da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 1141da177e4SLinus Torvalds return error; 1151da177e4SLinus Torvalds } 1161da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 1171da177e4SLinus Torvalds return 0; 1181da177e4SLinus Torvalds } 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds /* 1211da177e4SLinus Torvalds * This must be called only on pages that have 1221da177e4SLinus Torvalds * been verified to be in the swap cache. 1231da177e4SLinus Torvalds */ 1241da177e4SLinus Torvalds void __delete_from_swap_cache(struct page *page) 1251da177e4SLinus Torvalds { 1261da177e4SLinus Torvalds BUG_ON(!PageLocked(page)); 1271da177e4SLinus Torvalds BUG_ON(!PageSwapCache(page)); 1281da177e4SLinus Torvalds BUG_ON(PageWriteback(page)); 1293279ffd9SHugh Dickins BUG_ON(PagePrivate(page)); 1301da177e4SLinus Torvalds 1314c21e2f2SHugh Dickins radix_tree_delete(&swapper_space.page_tree, page_private(page)); 1324c21e2f2SHugh Dickins set_page_private(page, 0); 1331da177e4SLinus Torvalds ClearPageSwapCache(page); 1341da177e4SLinus Torvalds total_swapcache_pages--; 1351da177e4SLinus Torvalds pagecache_acct(-1); 1361da177e4SLinus Torvalds INC_CACHE_INFO(del_total); 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds /** 1401da177e4SLinus Torvalds * add_to_swap - allocate swap space for a page 1411da177e4SLinus Torvalds * @page: page we want to move to swap 1421da177e4SLinus Torvalds * 1431da177e4SLinus Torvalds * Allocate swap space for the page and add the page to the 1441da177e4SLinus Torvalds * swap cache. Caller needs to hold the page lock. 1451da177e4SLinus Torvalds */ 1461480a540SChristoph Lameter int add_to_swap(struct page * page, gfp_t gfp_mask) 1471da177e4SLinus Torvalds { 1481da177e4SLinus Torvalds swp_entry_t entry; 1491da177e4SLinus Torvalds int err; 1501da177e4SLinus Torvalds 151e74ca2b4SEric Sesterhenn BUG_ON(!PageLocked(page)); 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds for (;;) { 1541da177e4SLinus Torvalds entry = get_swap_page(); 1551da177e4SLinus Torvalds if (!entry.val) 1561da177e4SLinus Torvalds return 0; 1571da177e4SLinus Torvalds 158bd53b714SNick Piggin /* 159bd53b714SNick Piggin * Radix-tree node allocations from PF_MEMALLOC contexts could 160bd53b714SNick Piggin * completely exhaust the page allocator. __GFP_NOMEMALLOC 161bd53b714SNick Piggin * stops emergency reserves from being allocated. 1621da177e4SLinus Torvalds * 163bd53b714SNick Piggin * TODO: this could cause a theoretical memory reclaim 164bd53b714SNick Piggin * deadlock in the swap out path. 1651da177e4SLinus Torvalds */ 1661da177e4SLinus Torvalds /* 1671da177e4SLinus Torvalds * Add it to the swap cache and mark it dirty 1681da177e4SLinus Torvalds */ 169bd53b714SNick Piggin err = __add_to_swap_cache(page, entry, 1701480a540SChristoph Lameter gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds switch (err) { 1731da177e4SLinus Torvalds case 0: /* Success */ 1741da177e4SLinus Torvalds SetPageUptodate(page); 1751da177e4SLinus Torvalds SetPageDirty(page); 1761da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 1771da177e4SLinus Torvalds return 1; 1781da177e4SLinus Torvalds case -EEXIST: 1791da177e4SLinus Torvalds /* Raced with "speculative" read_swap_cache_async */ 1801da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 1811da177e4SLinus Torvalds swap_free(entry); 1821da177e4SLinus Torvalds continue; 1831da177e4SLinus Torvalds default: 1841da177e4SLinus Torvalds /* -ENOMEM radix-tree allocation failure */ 1851da177e4SLinus Torvalds swap_free(entry); 1861da177e4SLinus Torvalds return 0; 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds /* 1921da177e4SLinus Torvalds * This must be called only on pages that have 1931da177e4SLinus Torvalds * been verified to be in the swap cache and locked. 1941da177e4SLinus Torvalds * It will never put the page into the free list, 1951da177e4SLinus Torvalds * the caller has a reference on the page. 1961da177e4SLinus Torvalds */ 1971da177e4SLinus Torvalds void delete_from_swap_cache(struct page *page) 1981da177e4SLinus Torvalds { 1991da177e4SLinus Torvalds swp_entry_t entry; 2001da177e4SLinus Torvalds 2014c21e2f2SHugh Dickins entry.val = page_private(page); 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds write_lock_irq(&swapper_space.tree_lock); 2041da177e4SLinus Torvalds __delete_from_swap_cache(page); 2051da177e4SLinus Torvalds write_unlock_irq(&swapper_space.tree_lock); 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds swap_free(entry); 2081da177e4SLinus Torvalds page_cache_release(page); 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds /* 2121da177e4SLinus Torvalds * Strange swizzling function only for use by shmem_writepage 2131da177e4SLinus Torvalds */ 2141da177e4SLinus Torvalds int move_to_swap_cache(struct page *page, swp_entry_t entry) 2151da177e4SLinus Torvalds { 2161da177e4SLinus Torvalds int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); 2171da177e4SLinus Torvalds if (!err) { 2181da177e4SLinus Torvalds remove_from_page_cache(page); 2191da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 2201da177e4SLinus Torvalds if (!swap_duplicate(entry)) 2211da177e4SLinus Torvalds BUG(); 2221da177e4SLinus Torvalds SetPageDirty(page); 2231da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 2241da177e4SLinus Torvalds } else if (err == -EEXIST) 2251da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 2261da177e4SLinus Torvalds return err; 2271da177e4SLinus Torvalds } 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds /* 2301da177e4SLinus Torvalds * Strange swizzling function for shmem_getpage (and shmem_unuse) 2311da177e4SLinus Torvalds */ 2321da177e4SLinus Torvalds int move_from_swap_cache(struct page *page, unsigned long index, 2331da177e4SLinus Torvalds struct address_space *mapping) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); 2361da177e4SLinus Torvalds if (!err) { 2371da177e4SLinus Torvalds delete_from_swap_cache(page); 2381da177e4SLinus Torvalds /* shift page from clean_pages to dirty_pages list */ 2391da177e4SLinus Torvalds ClearPageDirty(page); 2401da177e4SLinus Torvalds set_page_dirty(page); 2411da177e4SLinus Torvalds } 2421da177e4SLinus Torvalds return err; 2431da177e4SLinus Torvalds } 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds /* 2461da177e4SLinus Torvalds * If we are the only user, then try to free up the swap cache. 2471da177e4SLinus Torvalds * 2481da177e4SLinus Torvalds * Its ok to check for PageSwapCache without the page lock 2491da177e4SLinus Torvalds * here because we are going to recheck again inside 2501da177e4SLinus Torvalds * exclusive_swap_page() _with_ the lock. 2511da177e4SLinus Torvalds * - Marcelo 2521da177e4SLinus Torvalds */ 2531da177e4SLinus Torvalds static inline void free_swap_cache(struct page *page) 2541da177e4SLinus Torvalds { 2551da177e4SLinus Torvalds if (PageSwapCache(page) && !TestSetPageLocked(page)) { 2561da177e4SLinus Torvalds remove_exclusive_swap_page(page); 2571da177e4SLinus Torvalds unlock_page(page); 2581da177e4SLinus Torvalds } 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds /* 2621da177e4SLinus Torvalds * Perform a free_page(), also freeing any swap cache associated with 263b8072f09SHugh Dickins * this page if it is the last user of the page. 2641da177e4SLinus Torvalds */ 2651da177e4SLinus Torvalds void free_page_and_swap_cache(struct page *page) 2661da177e4SLinus Torvalds { 2671da177e4SLinus Torvalds free_swap_cache(page); 2681da177e4SLinus Torvalds page_cache_release(page); 2691da177e4SLinus Torvalds } 2701da177e4SLinus Torvalds 2711da177e4SLinus Torvalds /* 2721da177e4SLinus Torvalds * Passed an array of pages, drop them all from swapcache and then release 2731da177e4SLinus Torvalds * them. They are removed from the LRU and freed if this is their last use. 2741da177e4SLinus Torvalds */ 2751da177e4SLinus Torvalds void free_pages_and_swap_cache(struct page **pages, int nr) 2761da177e4SLinus Torvalds { 2771da177e4SLinus Torvalds struct page **pagep = pages; 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds lru_add_drain(); 2801da177e4SLinus Torvalds while (nr) { 281c484d410SHugh Dickins int todo = min(nr, PAGEVEC_SIZE); 2821da177e4SLinus Torvalds int i; 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds for (i = 0; i < todo; i++) 2851da177e4SLinus Torvalds free_swap_cache(pagep[i]); 2861da177e4SLinus Torvalds release_pages(pagep, todo, 0); 2871da177e4SLinus Torvalds pagep += todo; 2881da177e4SLinus Torvalds nr -= todo; 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds /* 2931da177e4SLinus Torvalds * Lookup a swap entry in the swap cache. A found page will be returned 2941da177e4SLinus Torvalds * unlocked and with its refcount incremented - we rely on the kernel 2951da177e4SLinus Torvalds * lock getting page table operations atomic even if we drop the page 2961da177e4SLinus Torvalds * lock before returning. 2971da177e4SLinus Torvalds */ 2981da177e4SLinus Torvalds struct page * lookup_swap_cache(swp_entry_t entry) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds struct page *page; 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds page = find_get_page(&swapper_space, entry.val); 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds if (page) 3051da177e4SLinus Torvalds INC_CACHE_INFO(find_success); 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds INC_CACHE_INFO(find_total); 3081da177e4SLinus Torvalds return page; 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds /* 3121da177e4SLinus Torvalds * Locate a page of swap in physical memory, reserving swap cache space 3131da177e4SLinus Torvalds * and reading the disk if it is not already cached. 3141da177e4SLinus Torvalds * A failure return means that either the page allocation failed or that 3151da177e4SLinus Torvalds * the swap entry is no longer in use. 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds struct page *read_swap_cache_async(swp_entry_t entry, 3181da177e4SLinus Torvalds struct vm_area_struct *vma, unsigned long addr) 3191da177e4SLinus Torvalds { 3201da177e4SLinus Torvalds struct page *found_page, *new_page = NULL; 3211da177e4SLinus Torvalds int err; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds do { 3241da177e4SLinus Torvalds /* 3251da177e4SLinus Torvalds * First check the swap cache. Since this is normally 3261da177e4SLinus Torvalds * called after lookup_swap_cache() failed, re-calling 3271da177e4SLinus Torvalds * that would confuse statistics. 3281da177e4SLinus Torvalds */ 3291da177e4SLinus Torvalds found_page = find_get_page(&swapper_space, entry.val); 3301da177e4SLinus Torvalds if (found_page) 3311da177e4SLinus Torvalds break; 3321da177e4SLinus Torvalds 3331da177e4SLinus Torvalds /* 3341da177e4SLinus Torvalds * Get a new page to read into from swap. 3351da177e4SLinus Torvalds */ 3361da177e4SLinus Torvalds if (!new_page) { 3371da177e4SLinus Torvalds new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); 3381da177e4SLinus Torvalds if (!new_page) 3391da177e4SLinus Torvalds break; /* Out of memory */ 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds /* 3431da177e4SLinus Torvalds * Associate the page with swap entry in the swap cache. 3441da177e4SLinus Torvalds * May fail (-ENOENT) if swap entry has been freed since 3451da177e4SLinus Torvalds * our caller observed it. May fail (-EEXIST) if there 3461da177e4SLinus Torvalds * is already a page associated with this entry in the 3471da177e4SLinus Torvalds * swap cache: added by a racing read_swap_cache_async, 3481da177e4SLinus Torvalds * or by try_to_swap_out (or shmem_writepage) re-using 3491da177e4SLinus Torvalds * the just freed swap entry for an existing page. 3501da177e4SLinus Torvalds * May fail (-ENOMEM) if radix-tree node allocation failed. 3511da177e4SLinus Torvalds */ 3521da177e4SLinus Torvalds err = add_to_swap_cache(new_page, entry); 3531da177e4SLinus Torvalds if (!err) { 3541da177e4SLinus Torvalds /* 3551da177e4SLinus Torvalds * Initiate read into locked page and return. 3561da177e4SLinus Torvalds */ 3571da177e4SLinus Torvalds lru_cache_add_active(new_page); 3581da177e4SLinus Torvalds swap_readpage(NULL, new_page); 3591da177e4SLinus Torvalds return new_page; 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds } while (err != -ENOENT && err != -ENOMEM); 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds if (new_page) 3641da177e4SLinus Torvalds page_cache_release(new_page); 3651da177e4SLinus Torvalds return found_page; 3661da177e4SLinus Torvalds } 367