11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap_state.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Rewritten to use page cache, (C) 1998 Stephen Tweedie 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds #include <linux/mm.h> 105a0e3ad6STejun Heo #include <linux/gfp.h> 111da177e4SLinus Torvalds #include <linux/kernel_stat.h> 121da177e4SLinus Torvalds #include <linux/swap.h> 1346017e95SHugh Dickins #include <linux/swapops.h> 141da177e4SLinus Torvalds #include <linux/init.h> 151da177e4SLinus Torvalds #include <linux/pagemap.h> 161da177e4SLinus Torvalds #include <linux/backing-dev.h> 173fb5c298SChristian Ehrhardt #include <linux/blkdev.h> 18c484d410SHugh Dickins #include <linux/pagevec.h> 19b20a3503SChristoph Lameter #include <linux/migrate.h> 204b3ef9daSHuang, Ying #include <linux/vmalloc.h> 2167afa38eSTim Chen #include <linux/swap_slots.h> 2238d8b4e6SHuang Ying #include <linux/huge_mm.h> 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds #include <asm/pgtable.h> 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * swapper_space is a fiction, retained to simplify the path through 287eaceaccSJens Axboe * vmscan's shrink_page_list. 291da177e4SLinus Torvalds */ 30f5e54d6eSChristoph Hellwig static const struct address_space_operations swap_aops = { 311da177e4SLinus Torvalds .writepage = swap_writepage, 3262c230bcSMel Gorman .set_page_dirty = swap_set_page_dirty, 331c93923cSAndrew Morton #ifdef CONFIG_MIGRATION 34e965f963SChristoph Lameter .migratepage = migrate_page, 351c93923cSAndrew Morton #endif 361da177e4SLinus Torvalds }; 371da177e4SLinus Torvalds 384b3ef9daSHuang, Ying struct address_space *swapper_spaces[MAX_SWAPFILES]; 394b3ef9daSHuang, Ying static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; 40ec560175SHuang Ying bool swap_vma_readahead = true; 41ec560175SHuang Ying 42ec560175SHuang Ying #define SWAP_RA_MAX_ORDER_DEFAULT 3 43ec560175SHuang Ying 44ec560175SHuang Ying static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT; 45ec560175SHuang Ying 46ec560175SHuang Ying #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 47ec560175SHuang Ying #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 48ec560175SHuang Ying #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 49ec560175SHuang Ying #define SWAP_RA_WIN_MASK (~PAGE_MASK & ~SWAP_RA_HITS_MASK) 50ec560175SHuang Ying 51ec560175SHuang Ying #define SWAP_RA_HITS(v) ((v) & SWAP_RA_HITS_MASK) 52ec560175SHuang Ying #define SWAP_RA_WIN(v) (((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT) 53ec560175SHuang Ying #define SWAP_RA_ADDR(v) ((v) & PAGE_MASK) 54ec560175SHuang Ying 55ec560175SHuang Ying #define SWAP_RA_VAL(addr, win, hits) \ 56ec560175SHuang Ying (((addr) & PAGE_MASK) | \ 57ec560175SHuang Ying (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) | \ 58ec560175SHuang Ying ((hits) & SWAP_RA_HITS_MASK)) 59ec560175SHuang Ying 60ec560175SHuang Ying /* Initial readahead hits is 4 to start up with a small window */ 61ec560175SHuang Ying #define GET_SWAP_RA_VAL(vma) \ 62ec560175SHuang Ying (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 6538d8b4e6SHuang Ying #define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds static struct { 681da177e4SLinus Torvalds unsigned long add_total; 691da177e4SLinus Torvalds unsigned long del_total; 701da177e4SLinus Torvalds unsigned long find_success; 711da177e4SLinus Torvalds unsigned long find_total; 721da177e4SLinus Torvalds } swap_cache_info; 731da177e4SLinus Torvalds 7433806f06SShaohua Li unsigned long total_swapcache_pages(void) 7533806f06SShaohua Li { 764b3ef9daSHuang, Ying unsigned int i, j, nr; 7733806f06SShaohua Li unsigned long ret = 0; 784b3ef9daSHuang, Ying struct address_space *spaces; 7933806f06SShaohua Li 804b3ef9daSHuang, Ying rcu_read_lock(); 814b3ef9daSHuang, Ying for (i = 0; i < MAX_SWAPFILES; i++) { 824b3ef9daSHuang, Ying /* 834b3ef9daSHuang, Ying * The corresponding entries in nr_swapper_spaces and 844b3ef9daSHuang, Ying * swapper_spaces will be reused only after at least 854b3ef9daSHuang, Ying * one grace period. So it is impossible for them 864b3ef9daSHuang, Ying * belongs to different usage. 874b3ef9daSHuang, Ying */ 884b3ef9daSHuang, Ying nr = nr_swapper_spaces[i]; 894b3ef9daSHuang, Ying spaces = rcu_dereference(swapper_spaces[i]); 904b3ef9daSHuang, Ying if (!nr || !spaces) 914b3ef9daSHuang, Ying continue; 924b3ef9daSHuang, Ying for (j = 0; j < nr; j++) 934b3ef9daSHuang, Ying ret += spaces[j].nrpages; 944b3ef9daSHuang, Ying } 954b3ef9daSHuang, Ying rcu_read_unlock(); 9633806f06SShaohua Li return ret; 9733806f06SShaohua Li } 9833806f06SShaohua Li 99579f8290SShaohua Li static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); 100579f8290SShaohua Li 1011da177e4SLinus Torvalds void show_swap_cache_info(void) 1021da177e4SLinus Torvalds { 10333806f06SShaohua Li printk("%lu pages in swap cache\n", total_swapcache_pages()); 1042c97b7fcSJohannes Weiner printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 1051da177e4SLinus Torvalds swap_cache_info.add_total, swap_cache_info.del_total, 106bb63be0aSHugh Dickins swap_cache_info.find_success, swap_cache_info.find_total); 107ec8acf20SShaohua Li printk("Free swap = %ldkB\n", 108ec8acf20SShaohua Li get_nr_swap_pages() << (PAGE_SHIFT - 10)); 1091da177e4SLinus Torvalds printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 1101da177e4SLinus Torvalds } 1111da177e4SLinus Torvalds 1121da177e4SLinus Torvalds /* 11331a56396SDaisuke Nishimura * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, 1141da177e4SLinus Torvalds * but sets SwapCache flag and private instead of mapping and index. 1151da177e4SLinus Torvalds */ 1162f772e6cSSeth Jennings int __add_to_swap_cache(struct page *page, swp_entry_t entry) 1171da177e4SLinus Torvalds { 11838d8b4e6SHuang Ying int error, i, nr = hpage_nr_pages(page); 11933806f06SShaohua Li struct address_space *address_space; 12038d8b4e6SHuang Ying pgoff_t idx = swp_offset(entry); 1211da177e4SLinus Torvalds 122309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 123309381feSSasha Levin VM_BUG_ON_PAGE(PageSwapCache(page), page); 124309381feSSasha Levin VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 12551726b12SHugh Dickins 12638d8b4e6SHuang Ying page_ref_add(page, nr); 1271da177e4SLinus Torvalds SetPageSwapCache(page); 128e286781dSNick Piggin 12933806f06SShaohua Li address_space = swap_address_space(entry); 13033806f06SShaohua Li spin_lock_irq(&address_space->tree_lock); 13138d8b4e6SHuang Ying for (i = 0; i < nr; i++) { 13238d8b4e6SHuang Ying set_page_private(page + i, entry.val + i); 13333806f06SShaohua Li error = radix_tree_insert(&address_space->page_tree, 13438d8b4e6SHuang Ying idx + i, page + i); 13538d8b4e6SHuang Ying if (unlikely(error)) 13638d8b4e6SHuang Ying break; 1371da177e4SLinus Torvalds } 13838d8b4e6SHuang Ying if (likely(!error)) { 13938d8b4e6SHuang Ying address_space->nrpages += nr; 14038d8b4e6SHuang Ying __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); 14138d8b4e6SHuang Ying ADD_CACHE_INFO(add_total, nr); 14238d8b4e6SHuang Ying } else { 1432ca4532aSDaisuke Nishimura /* 1442ca4532aSDaisuke Nishimura * Only the context which have set SWAP_HAS_CACHE flag 1452ca4532aSDaisuke Nishimura * would call add_to_swap_cache(). 1462ca4532aSDaisuke Nishimura * So add_to_swap_cache() doesn't returns -EEXIST. 1472ca4532aSDaisuke Nishimura */ 1482ca4532aSDaisuke Nishimura VM_BUG_ON(error == -EEXIST); 14938d8b4e6SHuang Ying set_page_private(page + i, 0UL); 15038d8b4e6SHuang Ying while (i--) { 15138d8b4e6SHuang Ying radix_tree_delete(&address_space->page_tree, idx + i); 15238d8b4e6SHuang Ying set_page_private(page + i, 0UL); 153e286781dSNick Piggin } 15438d8b4e6SHuang Ying ClearPageSwapCache(page); 15538d8b4e6SHuang Ying page_ref_sub(page, nr); 15638d8b4e6SHuang Ying } 15738d8b4e6SHuang Ying spin_unlock_irq(&address_space->tree_lock); 15831a56396SDaisuke Nishimura 15931a56396SDaisuke Nishimura return error; 16031a56396SDaisuke Nishimura } 16131a56396SDaisuke Nishimura 16231a56396SDaisuke Nishimura 16331a56396SDaisuke Nishimura int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) 16431a56396SDaisuke Nishimura { 16531a56396SDaisuke Nishimura int error; 16631a56396SDaisuke Nishimura 16738d8b4e6SHuang Ying error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page)); 16831a56396SDaisuke Nishimura if (!error) { 16931a56396SDaisuke Nishimura error = __add_to_swap_cache(page, entry); 17031a56396SDaisuke Nishimura radix_tree_preload_end(); 171fa1de900SHugh Dickins } 1721da177e4SLinus Torvalds return error; 1731da177e4SLinus Torvalds } 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds /* 1761da177e4SLinus Torvalds * This must be called only on pages that have 1771da177e4SLinus Torvalds * been verified to be in the swap cache. 1781da177e4SLinus Torvalds */ 1791da177e4SLinus Torvalds void __delete_from_swap_cache(struct page *page) 1801da177e4SLinus Torvalds { 18133806f06SShaohua Li struct address_space *address_space; 18238d8b4e6SHuang Ying int i, nr = hpage_nr_pages(page); 18338d8b4e6SHuang Ying swp_entry_t entry; 18438d8b4e6SHuang Ying pgoff_t idx; 18533806f06SShaohua Li 186309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 187309381feSSasha Levin VM_BUG_ON_PAGE(!PageSwapCache(page), page); 188309381feSSasha Levin VM_BUG_ON_PAGE(PageWriteback(page), page); 1891da177e4SLinus Torvalds 19033806f06SShaohua Li entry.val = page_private(page); 19133806f06SShaohua Li address_space = swap_address_space(entry); 19238d8b4e6SHuang Ying idx = swp_offset(entry); 19338d8b4e6SHuang Ying for (i = 0; i < nr; i++) { 19438d8b4e6SHuang Ying radix_tree_delete(&address_space->page_tree, idx + i); 19538d8b4e6SHuang Ying set_page_private(page + i, 0); 19638d8b4e6SHuang Ying } 1971da177e4SLinus Torvalds ClearPageSwapCache(page); 19838d8b4e6SHuang Ying address_space->nrpages -= nr; 19938d8b4e6SHuang Ying __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); 20038d8b4e6SHuang Ying ADD_CACHE_INFO(del_total, nr); 2011da177e4SLinus Torvalds } 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds /** 2041da177e4SLinus Torvalds * add_to_swap - allocate swap space for a page 2051da177e4SLinus Torvalds * @page: page we want to move to swap 2061da177e4SLinus Torvalds * 2071da177e4SLinus Torvalds * Allocate swap space for the page and add the page to the 2081da177e4SLinus Torvalds * swap cache. Caller needs to hold the page lock. 2091da177e4SLinus Torvalds */ 2100f074658SMinchan Kim int add_to_swap(struct page *page) 2111da177e4SLinus Torvalds { 2121da177e4SLinus Torvalds swp_entry_t entry; 2131da177e4SLinus Torvalds int err; 2141da177e4SLinus Torvalds 215309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 216309381feSSasha Levin VM_BUG_ON_PAGE(!PageUptodate(page), page); 2171da177e4SLinus Torvalds 21838d8b4e6SHuang Ying entry = get_swap_page(page); 2191da177e4SLinus Torvalds if (!entry.val) 2200f074658SMinchan Kim return 0; 2210f074658SMinchan Kim 22238d8b4e6SHuang Ying if (mem_cgroup_try_charge_swap(page, entry)) 2230f074658SMinchan Kim goto fail; 2243f04f62fSAndrea Arcangeli 225bd53b714SNick Piggin /* 226bd53b714SNick Piggin * Radix-tree node allocations from PF_MEMALLOC contexts could 227bd53b714SNick Piggin * completely exhaust the page allocator. __GFP_NOMEMALLOC 228bd53b714SNick Piggin * stops emergency reserves from being allocated. 2291da177e4SLinus Torvalds * 230bd53b714SNick Piggin * TODO: this could cause a theoretical memory reclaim 231bd53b714SNick Piggin * deadlock in the swap out path. 2321da177e4SLinus Torvalds */ 2331da177e4SLinus Torvalds /* 234854e9ed0SMinchan Kim * Add it to the swap cache. 2351da177e4SLinus Torvalds */ 236f000944dSHugh Dickins err = add_to_swap_cache(page, entry, 237ac47b003SHugh Dickins __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); 23838d8b4e6SHuang Ying /* -ENOMEM radix-tree allocation failure */ 23938d8b4e6SHuang Ying if (err) 2402ca4532aSDaisuke Nishimura /* 2412ca4532aSDaisuke Nishimura * add_to_swap_cache() doesn't return -EEXIST, so we can safely 2422ca4532aSDaisuke Nishimura * clear SWAP_HAS_CACHE flag. 2432ca4532aSDaisuke Nishimura */ 2440f074658SMinchan Kim goto fail; 245*9625456cSShaohua Li /* 246*9625456cSShaohua Li * Normally the page will be dirtied in unmap because its pte should be 247*9625456cSShaohua Li * dirty. A special case is MADV_FREE page. The page'e pte could have 248*9625456cSShaohua Li * dirty bit cleared but the page's SwapBacked bit is still set because 249*9625456cSShaohua Li * clearing the dirty bit and SwapBacked bit has no lock protected. For 250*9625456cSShaohua Li * such page, unmap will not set dirty bit for it, so page reclaim will 251*9625456cSShaohua Li * not write the page out. This can cause data corruption when the page 252*9625456cSShaohua Li * is swap in later. Always setting the dirty bit for the page solves 253*9625456cSShaohua Li * the problem. 254*9625456cSShaohua Li */ 255*9625456cSShaohua Li set_page_dirty(page); 2561da177e4SLinus Torvalds 25738d8b4e6SHuang Ying return 1; 25838d8b4e6SHuang Ying 25938d8b4e6SHuang Ying fail: 2600f074658SMinchan Kim put_swap_page(page, entry); 26138d8b4e6SHuang Ying return 0; 26238d8b4e6SHuang Ying } 26338d8b4e6SHuang Ying 2641da177e4SLinus Torvalds /* 2651da177e4SLinus Torvalds * This must be called only on pages that have 2661da177e4SLinus Torvalds * been verified to be in the swap cache and locked. 2671da177e4SLinus Torvalds * It will never put the page into the free list, 2681da177e4SLinus Torvalds * the caller has a reference on the page. 2691da177e4SLinus Torvalds */ 2701da177e4SLinus Torvalds void delete_from_swap_cache(struct page *page) 2711da177e4SLinus Torvalds { 2721da177e4SLinus Torvalds swp_entry_t entry; 27333806f06SShaohua Li struct address_space *address_space; 2741da177e4SLinus Torvalds 2754c21e2f2SHugh Dickins entry.val = page_private(page); 2761da177e4SLinus Torvalds 27733806f06SShaohua Li address_space = swap_address_space(entry); 27833806f06SShaohua Li spin_lock_irq(&address_space->tree_lock); 2791da177e4SLinus Torvalds __delete_from_swap_cache(page); 28033806f06SShaohua Li spin_unlock_irq(&address_space->tree_lock); 2811da177e4SLinus Torvalds 28275f6d6d2SMinchan Kim put_swap_page(page, entry); 28338d8b4e6SHuang Ying page_ref_sub(page, hpage_nr_pages(page)); 2841da177e4SLinus Torvalds } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds /* 2871da177e4SLinus Torvalds * If we are the only user, then try to free up the swap cache. 2881da177e4SLinus Torvalds * 2891da177e4SLinus Torvalds * Its ok to check for PageSwapCache without the page lock 2901da177e4SLinus Torvalds * here because we are going to recheck again inside 291a2c43eedSHugh Dickins * try_to_free_swap() _with_ the lock. 2921da177e4SLinus Torvalds * - Marcelo 2931da177e4SLinus Torvalds */ 2941da177e4SLinus Torvalds static inline void free_swap_cache(struct page *page) 2951da177e4SLinus Torvalds { 296a2c43eedSHugh Dickins if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) { 297a2c43eedSHugh Dickins try_to_free_swap(page); 2981da177e4SLinus Torvalds unlock_page(page); 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds /* 3031da177e4SLinus Torvalds * Perform a free_page(), also freeing any swap cache associated with 304b8072f09SHugh Dickins * this page if it is the last user of the page. 3051da177e4SLinus Torvalds */ 3061da177e4SLinus Torvalds void free_page_and_swap_cache(struct page *page) 3071da177e4SLinus Torvalds { 3081da177e4SLinus Torvalds free_swap_cache(page); 3096fcb52a5SAaron Lu if (!is_huge_zero_page(page)) 31009cbfeafSKirill A. Shutemov put_page(page); 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds 3131da177e4SLinus Torvalds /* 3141da177e4SLinus Torvalds * Passed an array of pages, drop them all from swapcache and then release 3151da177e4SLinus Torvalds * them. They are removed from the LRU and freed if this is their last use. 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds void free_pages_and_swap_cache(struct page **pages, int nr) 3181da177e4SLinus Torvalds { 3191da177e4SLinus Torvalds struct page **pagep = pages; 3201da177e4SLinus Torvalds int i; 3211da177e4SLinus Torvalds 322aabfb572SMichal Hocko lru_add_drain(); 323aabfb572SMichal Hocko for (i = 0; i < nr; i++) 3241da177e4SLinus Torvalds free_swap_cache(pagep[i]); 325aabfb572SMichal Hocko release_pages(pagep, nr, false); 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds /* 3291da177e4SLinus Torvalds * Lookup a swap entry in the swap cache. A found page will be returned 3301da177e4SLinus Torvalds * unlocked and with its refcount incremented - we rely on the kernel 3311da177e4SLinus Torvalds * lock getting page table operations atomic even if we drop the page 3321da177e4SLinus Torvalds * lock before returning. 3331da177e4SLinus Torvalds */ 334ec560175SHuang Ying struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, 335ec560175SHuang Ying unsigned long addr) 3361da177e4SLinus Torvalds { 3371da177e4SLinus Torvalds struct page *page; 338ec560175SHuang Ying unsigned long ra_info; 339ec560175SHuang Ying int win, hits, readahead; 3401da177e4SLinus Torvalds 341f6ab1f7fSHuang Ying page = find_get_page(swap_address_space(entry), swp_offset(entry)); 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds INC_CACHE_INFO(find_total); 344ec560175SHuang Ying if (page) { 345ec560175SHuang Ying INC_CACHE_INFO(find_success); 346ec560175SHuang Ying if (unlikely(PageTransCompound(page))) 347ec560175SHuang Ying return page; 348ec560175SHuang Ying readahead = TestClearPageReadahead(page); 349ec560175SHuang Ying if (vma) { 350ec560175SHuang Ying ra_info = GET_SWAP_RA_VAL(vma); 351ec560175SHuang Ying win = SWAP_RA_WIN(ra_info); 352ec560175SHuang Ying hits = SWAP_RA_HITS(ra_info); 353ec560175SHuang Ying if (readahead) 354ec560175SHuang Ying hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 355ec560175SHuang Ying atomic_long_set(&vma->swap_readahead_info, 356ec560175SHuang Ying SWAP_RA_VAL(addr, win, hits)); 357ec560175SHuang Ying } 358ec560175SHuang Ying if (readahead) { 359ec560175SHuang Ying count_vm_event(SWAP_RA_HIT); 360ec560175SHuang Ying if (!vma) 361ec560175SHuang Ying atomic_inc(&swapin_readahead_hits); 362ec560175SHuang Ying } 363ec560175SHuang Ying } 3641da177e4SLinus Torvalds return page; 3651da177e4SLinus Torvalds } 3661da177e4SLinus Torvalds 3675b999aadSDmitry Safonov struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 3685b999aadSDmitry Safonov struct vm_area_struct *vma, unsigned long addr, 3695b999aadSDmitry Safonov bool *new_page_allocated) 3701da177e4SLinus Torvalds { 3711da177e4SLinus Torvalds struct page *found_page, *new_page = NULL; 3725b999aadSDmitry Safonov struct address_space *swapper_space = swap_address_space(entry); 3731da177e4SLinus Torvalds int err; 3745b999aadSDmitry Safonov *new_page_allocated = false; 3751da177e4SLinus Torvalds 3761da177e4SLinus Torvalds do { 3771da177e4SLinus Torvalds /* 3781da177e4SLinus Torvalds * First check the swap cache. Since this is normally 3791da177e4SLinus Torvalds * called after lookup_swap_cache() failed, re-calling 3801da177e4SLinus Torvalds * that would confuse statistics. 3811da177e4SLinus Torvalds */ 382f6ab1f7fSHuang Ying found_page = find_get_page(swapper_space, swp_offset(entry)); 3831da177e4SLinus Torvalds if (found_page) 3841da177e4SLinus Torvalds break; 3851da177e4SLinus Torvalds 386ba81f838SHuang Ying /* 387ba81f838SHuang Ying * Just skip read ahead for unused swap slot. 388ba81f838SHuang Ying * During swap_off when swap_slot_cache is disabled, 389ba81f838SHuang Ying * we have to handle the race between putting 390ba81f838SHuang Ying * swap entry in swap cache and marking swap slot 391ba81f838SHuang Ying * as SWAP_HAS_CACHE. That's done in later part of code or 392ba81f838SHuang Ying * else swap_off will be aborted if we return NULL. 393ba81f838SHuang Ying */ 394ba81f838SHuang Ying if (!__swp_swapcount(entry) && swap_slot_cache_enabled) 395ba81f838SHuang Ying break; 396e8c26ab6STim Chen 3971da177e4SLinus Torvalds /* 3981da177e4SLinus Torvalds * Get a new page to read into from swap. 3991da177e4SLinus Torvalds */ 4001da177e4SLinus Torvalds if (!new_page) { 40102098feaSHugh Dickins new_page = alloc_page_vma(gfp_mask, vma, addr); 4021da177e4SLinus Torvalds if (!new_page) 4031da177e4SLinus Torvalds break; /* Out of memory */ 4041da177e4SLinus Torvalds } 4051da177e4SLinus Torvalds 4061da177e4SLinus Torvalds /* 40731a56396SDaisuke Nishimura * call radix_tree_preload() while we can wait. 40831a56396SDaisuke Nishimura */ 4095e4c0d97SJan Kara err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL); 41031a56396SDaisuke Nishimura if (err) 41131a56396SDaisuke Nishimura break; 41231a56396SDaisuke Nishimura 41331a56396SDaisuke Nishimura /* 414f000944dSHugh Dickins * Swap entry may have been freed since our caller observed it. 415f000944dSHugh Dickins */ 416355cfa73SKAMEZAWA Hiroyuki err = swapcache_prepare(entry); 417cbab0e4eSRafael Aquini if (err == -EEXIST) { 41831a56396SDaisuke Nishimura radix_tree_preload_end(); 419cbab0e4eSRafael Aquini /* 420cbab0e4eSRafael Aquini * We might race against get_swap_page() and stumble 421cbab0e4eSRafael Aquini * across a SWAP_HAS_CACHE swap_map entry whose page 4229c1cc2e4SHuang Ying * has not been brought into the swapcache yet. 423cbab0e4eSRafael Aquini */ 424cbab0e4eSRafael Aquini cond_resched(); 425355cfa73SKAMEZAWA Hiroyuki continue; 42631a56396SDaisuke Nishimura } 42731a56396SDaisuke Nishimura if (err) { /* swp entry is obsolete ? */ 42831a56396SDaisuke Nishimura radix_tree_preload_end(); 429f000944dSHugh Dickins break; 43031a56396SDaisuke Nishimura } 431f000944dSHugh Dickins 4322ca4532aSDaisuke Nishimura /* May fail (-ENOMEM) if radix-tree node allocation failed. */ 43348c935adSKirill A. Shutemov __SetPageLocked(new_page); 434fa9949daSHugh Dickins __SetPageSwapBacked(new_page); 43531a56396SDaisuke Nishimura err = __add_to_swap_cache(new_page, entry); 436529ae9aaSNick Piggin if (likely(!err)) { 43731a56396SDaisuke Nishimura radix_tree_preload_end(); 4381da177e4SLinus Torvalds /* 4391da177e4SLinus Torvalds * Initiate read into locked page and return. 4401da177e4SLinus Torvalds */ 441c5fdae46SRik van Riel lru_cache_add_anon(new_page); 4425b999aadSDmitry Safonov *new_page_allocated = true; 4431da177e4SLinus Torvalds return new_page; 4441da177e4SLinus Torvalds } 44531a56396SDaisuke Nishimura radix_tree_preload_end(); 44648c935adSKirill A. Shutemov __ClearPageLocked(new_page); 4472ca4532aSDaisuke Nishimura /* 4482ca4532aSDaisuke Nishimura * add_to_swap_cache() doesn't return -EEXIST, so we can safely 4492ca4532aSDaisuke Nishimura * clear SWAP_HAS_CACHE flag. 4502ca4532aSDaisuke Nishimura */ 45175f6d6d2SMinchan Kim put_swap_page(new_page, entry); 452f000944dSHugh Dickins } while (err != -ENOMEM); 4531da177e4SLinus Torvalds 4541da177e4SLinus Torvalds if (new_page) 45509cbfeafSKirill A. Shutemov put_page(new_page); 4561da177e4SLinus Torvalds return found_page; 4571da177e4SLinus Torvalds } 45846017e95SHugh Dickins 4595b999aadSDmitry Safonov /* 4605b999aadSDmitry Safonov * Locate a page of swap in physical memory, reserving swap cache space 4615b999aadSDmitry Safonov * and reading the disk if it is not already cached. 4625b999aadSDmitry Safonov * A failure return means that either the page allocation failed or that 4635b999aadSDmitry Safonov * the swap entry is no longer in use. 4645b999aadSDmitry Safonov */ 4655b999aadSDmitry Safonov struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 46623955622SShaohua Li struct vm_area_struct *vma, unsigned long addr, bool do_poll) 4675b999aadSDmitry Safonov { 4685b999aadSDmitry Safonov bool page_was_allocated; 4695b999aadSDmitry Safonov struct page *retpage = __read_swap_cache_async(entry, gfp_mask, 4705b999aadSDmitry Safonov vma, addr, &page_was_allocated); 4715b999aadSDmitry Safonov 4725b999aadSDmitry Safonov if (page_was_allocated) 47323955622SShaohua Li swap_readpage(retpage, do_poll); 4745b999aadSDmitry Safonov 4755b999aadSDmitry Safonov return retpage; 4765b999aadSDmitry Safonov } 4775b999aadSDmitry Safonov 478ec560175SHuang Ying static unsigned int __swapin_nr_pages(unsigned long prev_offset, 479ec560175SHuang Ying unsigned long offset, 480ec560175SHuang Ying int hits, 481ec560175SHuang Ying int max_pages, 482ec560175SHuang Ying int prev_win) 483579f8290SShaohua Li { 484ec560175SHuang Ying unsigned int pages, last_ra; 485579f8290SShaohua Li 486579f8290SShaohua Li /* 487579f8290SShaohua Li * This heuristic has been found to work well on both sequential and 488579f8290SShaohua Li * random loads, swapping to hard disk or to SSD: please don't ask 489579f8290SShaohua Li * what the "+ 2" means, it just happens to work well, that's all. 490579f8290SShaohua Li */ 491ec560175SHuang Ying pages = hits + 2; 492579f8290SShaohua Li if (pages == 2) { 493579f8290SShaohua Li /* 494579f8290SShaohua Li * We can have no readahead hits to judge by: but must not get 495579f8290SShaohua Li * stuck here forever, so check for an adjacent offset instead 496579f8290SShaohua Li * (and don't even bother to check whether swap type is same). 497579f8290SShaohua Li */ 498579f8290SShaohua Li if (offset != prev_offset + 1 && offset != prev_offset - 1) 499579f8290SShaohua Li pages = 1; 500579f8290SShaohua Li } else { 501579f8290SShaohua Li unsigned int roundup = 4; 502579f8290SShaohua Li while (roundup < pages) 503579f8290SShaohua Li roundup <<= 1; 504579f8290SShaohua Li pages = roundup; 505579f8290SShaohua Li } 506579f8290SShaohua Li 507579f8290SShaohua Li if (pages > max_pages) 508579f8290SShaohua Li pages = max_pages; 509579f8290SShaohua Li 510579f8290SShaohua Li /* Don't shrink readahead too fast */ 511ec560175SHuang Ying last_ra = prev_win / 2; 512579f8290SShaohua Li if (pages < last_ra) 513579f8290SShaohua Li pages = last_ra; 514ec560175SHuang Ying 515ec560175SHuang Ying return pages; 516ec560175SHuang Ying } 517ec560175SHuang Ying 518ec560175SHuang Ying static unsigned long swapin_nr_pages(unsigned long offset) 519ec560175SHuang Ying { 520ec560175SHuang Ying static unsigned long prev_offset; 521ec560175SHuang Ying unsigned int hits, pages, max_pages; 522ec560175SHuang Ying static atomic_t last_readahead_pages; 523ec560175SHuang Ying 524ec560175SHuang Ying max_pages = 1 << READ_ONCE(page_cluster); 525ec560175SHuang Ying if (max_pages <= 1) 526ec560175SHuang Ying return 1; 527ec560175SHuang Ying 528ec560175SHuang Ying hits = atomic_xchg(&swapin_readahead_hits, 0); 529ec560175SHuang Ying pages = __swapin_nr_pages(prev_offset, offset, hits, max_pages, 530ec560175SHuang Ying atomic_read(&last_readahead_pages)); 531ec560175SHuang Ying if (!hits) 532ec560175SHuang Ying prev_offset = offset; 533579f8290SShaohua Li atomic_set(&last_readahead_pages, pages); 534579f8290SShaohua Li 535579f8290SShaohua Li return pages; 536579f8290SShaohua Li } 537579f8290SShaohua Li 53846017e95SHugh Dickins /** 53946017e95SHugh Dickins * swapin_readahead - swap in pages in hope we need them soon 54046017e95SHugh Dickins * @entry: swap entry of this memory 5417682486bSRandy Dunlap * @gfp_mask: memory allocation flags 54246017e95SHugh Dickins * @vma: user vma this address belongs to 54346017e95SHugh Dickins * @addr: target address for mempolicy 54446017e95SHugh Dickins * 54546017e95SHugh Dickins * Returns the struct page for entry and addr, after queueing swapin. 54646017e95SHugh Dickins * 54746017e95SHugh Dickins * Primitive swap readahead code. We simply read an aligned block of 54846017e95SHugh Dickins * (1 << page_cluster) entries in the swap area. This method is chosen 54946017e95SHugh Dickins * because it doesn't cost us any seek time. We also make sure to queue 55046017e95SHugh Dickins * the 'original' request together with the readahead ones... 55146017e95SHugh Dickins * 55246017e95SHugh Dickins * This has been extended to use the NUMA policies from the mm triggering 55346017e95SHugh Dickins * the readahead. 55446017e95SHugh Dickins * 55546017e95SHugh Dickins * Caller must hold down_read on the vma->vm_mm if vma is not NULL. 55646017e95SHugh Dickins */ 55702098feaSHugh Dickins struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, 55846017e95SHugh Dickins struct vm_area_struct *vma, unsigned long addr) 55946017e95SHugh Dickins { 56046017e95SHugh Dickins struct page *page; 561579f8290SShaohua Li unsigned long entry_offset = swp_offset(entry); 562579f8290SShaohua Li unsigned long offset = entry_offset; 56367f96aa2SRik van Riel unsigned long start_offset, end_offset; 564579f8290SShaohua Li unsigned long mask; 5653fb5c298SChristian Ehrhardt struct blk_plug plug; 566c4fa6309SHuang Ying bool do_poll = true, page_allocated; 56746017e95SHugh Dickins 568579f8290SShaohua Li mask = swapin_nr_pages(offset) - 1; 569579f8290SShaohua Li if (!mask) 570579f8290SShaohua Li goto skip; 571579f8290SShaohua Li 57223955622SShaohua Li do_poll = false; 57367f96aa2SRik van Riel /* Read a page_cluster sized and aligned cluster around offset. */ 57467f96aa2SRik van Riel start_offset = offset & ~mask; 57567f96aa2SRik van Riel end_offset = offset | mask; 57667f96aa2SRik van Riel if (!start_offset) /* First page is swap header. */ 57767f96aa2SRik van Riel start_offset++; 57867f96aa2SRik van Riel 5793fb5c298SChristian Ehrhardt blk_start_plug(&plug); 58067f96aa2SRik van Riel for (offset = start_offset; offset <= end_offset ; offset++) { 58146017e95SHugh Dickins /* Ok, do the async read-ahead now */ 582c4fa6309SHuang Ying page = __read_swap_cache_async( 583c4fa6309SHuang Ying swp_entry(swp_type(entry), offset), 584c4fa6309SHuang Ying gfp_mask, vma, addr, &page_allocated); 58546017e95SHugh Dickins if (!page) 58667f96aa2SRik van Riel continue; 587c4fa6309SHuang Ying if (page_allocated) { 588c4fa6309SHuang Ying swap_readpage(page, false); 589cbc65df2SHuang Ying if (offset != entry_offset && 590cbc65df2SHuang Ying likely(!PageTransCompound(page))) { 591579f8290SShaohua Li SetPageReadahead(page); 592cbc65df2SHuang Ying count_vm_event(SWAP_RA); 593cbc65df2SHuang Ying } 594c4fa6309SHuang Ying } 59509cbfeafSKirill A. Shutemov put_page(page); 59646017e95SHugh Dickins } 5973fb5c298SChristian Ehrhardt blk_finish_plug(&plug); 5983fb5c298SChristian Ehrhardt 59946017e95SHugh Dickins lru_add_drain(); /* Push any new pages onto the LRU now */ 600579f8290SShaohua Li skip: 60123955622SShaohua Li return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll); 60246017e95SHugh Dickins } 6034b3ef9daSHuang, Ying 6044b3ef9daSHuang, Ying int init_swap_address_space(unsigned int type, unsigned long nr_pages) 6054b3ef9daSHuang, Ying { 6064b3ef9daSHuang, Ying struct address_space *spaces, *space; 6074b3ef9daSHuang, Ying unsigned int i, nr; 6084b3ef9daSHuang, Ying 6094b3ef9daSHuang, Ying nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 61054f180d3SHuang Ying spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL); 6114b3ef9daSHuang, Ying if (!spaces) 6124b3ef9daSHuang, Ying return -ENOMEM; 6134b3ef9daSHuang, Ying for (i = 0; i < nr; i++) { 6144b3ef9daSHuang, Ying space = spaces + i; 6154b3ef9daSHuang, Ying INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN); 6164b3ef9daSHuang, Ying atomic_set(&space->i_mmap_writable, 0); 6174b3ef9daSHuang, Ying space->a_ops = &swap_aops; 6184b3ef9daSHuang, Ying /* swap cache doesn't use writeback related tags */ 6194b3ef9daSHuang, Ying mapping_set_no_writeback_tags(space); 6204b3ef9daSHuang, Ying spin_lock_init(&space->tree_lock); 6214b3ef9daSHuang, Ying } 6224b3ef9daSHuang, Ying nr_swapper_spaces[type] = nr; 6234b3ef9daSHuang, Ying rcu_assign_pointer(swapper_spaces[type], spaces); 6244b3ef9daSHuang, Ying 6254b3ef9daSHuang, Ying return 0; 6264b3ef9daSHuang, Ying } 6274b3ef9daSHuang, Ying 6284b3ef9daSHuang, Ying void exit_swap_address_space(unsigned int type) 6294b3ef9daSHuang, Ying { 6304b3ef9daSHuang, Ying struct address_space *spaces; 6314b3ef9daSHuang, Ying 6324b3ef9daSHuang, Ying spaces = swapper_spaces[type]; 6334b3ef9daSHuang, Ying nr_swapper_spaces[type] = 0; 6344b3ef9daSHuang, Ying rcu_assign_pointer(swapper_spaces[type], NULL); 6354b3ef9daSHuang, Ying synchronize_rcu(); 6364b3ef9daSHuang, Ying kvfree(spaces); 6374b3ef9daSHuang, Ying } 638ec560175SHuang Ying 639ec560175SHuang Ying static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, 640ec560175SHuang Ying unsigned long faddr, 641ec560175SHuang Ying unsigned long lpfn, 642ec560175SHuang Ying unsigned long rpfn, 643ec560175SHuang Ying unsigned long *start, 644ec560175SHuang Ying unsigned long *end) 645ec560175SHuang Ying { 646ec560175SHuang Ying *start = max3(lpfn, PFN_DOWN(vma->vm_start), 647ec560175SHuang Ying PFN_DOWN(faddr & PMD_MASK)); 648ec560175SHuang Ying *end = min3(rpfn, PFN_DOWN(vma->vm_end), 649ec560175SHuang Ying PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 650ec560175SHuang Ying } 651ec560175SHuang Ying 652ec560175SHuang Ying struct page *swap_readahead_detect(struct vm_fault *vmf, 653ec560175SHuang Ying struct vma_swap_readahead *swap_ra) 654ec560175SHuang Ying { 655ec560175SHuang Ying struct vm_area_struct *vma = vmf->vma; 656ec560175SHuang Ying unsigned long swap_ra_info; 657ec560175SHuang Ying struct page *page; 658ec560175SHuang Ying swp_entry_t entry; 659ec560175SHuang Ying unsigned long faddr, pfn, fpfn; 660ec560175SHuang Ying unsigned long start, end; 661ec560175SHuang Ying pte_t *pte; 662ec560175SHuang Ying unsigned int max_win, hits, prev_win, win, left; 663ec560175SHuang Ying #ifndef CONFIG_64BIT 664ec560175SHuang Ying pte_t *tpte; 665ec560175SHuang Ying #endif 666ec560175SHuang Ying 667ec560175SHuang Ying faddr = vmf->address; 668ec560175SHuang Ying entry = pte_to_swp_entry(vmf->orig_pte); 669ec560175SHuang Ying if ((unlikely(non_swap_entry(entry)))) 670ec560175SHuang Ying return NULL; 671ec560175SHuang Ying page = lookup_swap_cache(entry, vma, faddr); 672ec560175SHuang Ying if (page) 673ec560175SHuang Ying return page; 674ec560175SHuang Ying 675ec560175SHuang Ying max_win = 1 << READ_ONCE(swap_ra_max_order); 676ec560175SHuang Ying if (max_win == 1) { 677ec560175SHuang Ying swap_ra->win = 1; 678ec560175SHuang Ying return NULL; 679ec560175SHuang Ying } 680ec560175SHuang Ying 681ec560175SHuang Ying fpfn = PFN_DOWN(faddr); 682ec560175SHuang Ying swap_ra_info = GET_SWAP_RA_VAL(vma); 683ec560175SHuang Ying pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); 684ec560175SHuang Ying prev_win = SWAP_RA_WIN(swap_ra_info); 685ec560175SHuang Ying hits = SWAP_RA_HITS(swap_ra_info); 686ec560175SHuang Ying swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, 687ec560175SHuang Ying max_win, prev_win); 688ec560175SHuang Ying atomic_long_set(&vma->swap_readahead_info, 689ec560175SHuang Ying SWAP_RA_VAL(faddr, win, 0)); 690ec560175SHuang Ying 691ec560175SHuang Ying if (win == 1) 692ec560175SHuang Ying return NULL; 693ec560175SHuang Ying 694ec560175SHuang Ying /* Copy the PTEs because the page table may be unmapped */ 695ec560175SHuang Ying if (fpfn == pfn + 1) 696ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end); 697ec560175SHuang Ying else if (pfn == fpfn + 1) 698ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1, 699ec560175SHuang Ying &start, &end); 700ec560175SHuang Ying else { 701ec560175SHuang Ying left = (win - 1) / 2; 702ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, 703ec560175SHuang Ying &start, &end); 704ec560175SHuang Ying } 705ec560175SHuang Ying swap_ra->nr_pte = end - start; 706ec560175SHuang Ying swap_ra->offset = fpfn - start; 707ec560175SHuang Ying pte = vmf->pte - swap_ra->offset; 708ec560175SHuang Ying #ifdef CONFIG_64BIT 709ec560175SHuang Ying swap_ra->ptes = pte; 710ec560175SHuang Ying #else 711ec560175SHuang Ying tpte = swap_ra->ptes; 712ec560175SHuang Ying for (pfn = start; pfn != end; pfn++) 713ec560175SHuang Ying *tpte++ = *pte++; 714ec560175SHuang Ying #endif 715ec560175SHuang Ying 716ec560175SHuang Ying return NULL; 717ec560175SHuang Ying } 718ec560175SHuang Ying 719ec560175SHuang Ying struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, 720ec560175SHuang Ying struct vm_fault *vmf, 721ec560175SHuang Ying struct vma_swap_readahead *swap_ra) 722ec560175SHuang Ying { 723ec560175SHuang Ying struct blk_plug plug; 724ec560175SHuang Ying struct vm_area_struct *vma = vmf->vma; 725ec560175SHuang Ying struct page *page; 726ec560175SHuang Ying pte_t *pte, pentry; 727ec560175SHuang Ying swp_entry_t entry; 728ec560175SHuang Ying unsigned int i; 729ec560175SHuang Ying bool page_allocated; 730ec560175SHuang Ying 731ec560175SHuang Ying if (swap_ra->win == 1) 732ec560175SHuang Ying goto skip; 733ec560175SHuang Ying 734ec560175SHuang Ying blk_start_plug(&plug); 735ec560175SHuang Ying for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; 736ec560175SHuang Ying i++, pte++) { 737ec560175SHuang Ying pentry = *pte; 738ec560175SHuang Ying if (pte_none(pentry)) 739ec560175SHuang Ying continue; 740ec560175SHuang Ying if (pte_present(pentry)) 741ec560175SHuang Ying continue; 742ec560175SHuang Ying entry = pte_to_swp_entry(pentry); 743ec560175SHuang Ying if (unlikely(non_swap_entry(entry))) 744ec560175SHuang Ying continue; 745ec560175SHuang Ying page = __read_swap_cache_async(entry, gfp_mask, vma, 746ec560175SHuang Ying vmf->address, &page_allocated); 747ec560175SHuang Ying if (!page) 748ec560175SHuang Ying continue; 749ec560175SHuang Ying if (page_allocated) { 750ec560175SHuang Ying swap_readpage(page, false); 751ec560175SHuang Ying if (i != swap_ra->offset && 752ec560175SHuang Ying likely(!PageTransCompound(page))) { 753ec560175SHuang Ying SetPageReadahead(page); 754ec560175SHuang Ying count_vm_event(SWAP_RA); 755ec560175SHuang Ying } 756ec560175SHuang Ying } 757ec560175SHuang Ying put_page(page); 758ec560175SHuang Ying } 759ec560175SHuang Ying blk_finish_plug(&plug); 760ec560175SHuang Ying lru_add_drain(); 761ec560175SHuang Ying skip: 762ec560175SHuang Ying return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 763ec560175SHuang Ying swap_ra->win == 1); 764ec560175SHuang Ying } 765d9bfcfdcSHuang Ying 766d9bfcfdcSHuang Ying #ifdef CONFIG_SYSFS 767d9bfcfdcSHuang Ying static ssize_t vma_ra_enabled_show(struct kobject *kobj, 768d9bfcfdcSHuang Ying struct kobj_attribute *attr, char *buf) 769d9bfcfdcSHuang Ying { 770d9bfcfdcSHuang Ying return sprintf(buf, "%s\n", swap_vma_readahead ? "true" : "false"); 771d9bfcfdcSHuang Ying } 772d9bfcfdcSHuang Ying static ssize_t vma_ra_enabled_store(struct kobject *kobj, 773d9bfcfdcSHuang Ying struct kobj_attribute *attr, 774d9bfcfdcSHuang Ying const char *buf, size_t count) 775d9bfcfdcSHuang Ying { 776d9bfcfdcSHuang Ying if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) 777d9bfcfdcSHuang Ying swap_vma_readahead = true; 778d9bfcfdcSHuang Ying else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) 779d9bfcfdcSHuang Ying swap_vma_readahead = false; 780d9bfcfdcSHuang Ying else 781d9bfcfdcSHuang Ying return -EINVAL; 782d9bfcfdcSHuang Ying 783d9bfcfdcSHuang Ying return count; 784d9bfcfdcSHuang Ying } 785d9bfcfdcSHuang Ying static struct kobj_attribute vma_ra_enabled_attr = 786d9bfcfdcSHuang Ying __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 787d9bfcfdcSHuang Ying vma_ra_enabled_store); 788d9bfcfdcSHuang Ying 789d9bfcfdcSHuang Ying static ssize_t vma_ra_max_order_show(struct kobject *kobj, 790d9bfcfdcSHuang Ying struct kobj_attribute *attr, char *buf) 791d9bfcfdcSHuang Ying { 792d9bfcfdcSHuang Ying return sprintf(buf, "%d\n", swap_ra_max_order); 793d9bfcfdcSHuang Ying } 794d9bfcfdcSHuang Ying static ssize_t vma_ra_max_order_store(struct kobject *kobj, 795d9bfcfdcSHuang Ying struct kobj_attribute *attr, 796d9bfcfdcSHuang Ying const char *buf, size_t count) 797d9bfcfdcSHuang Ying { 798d9bfcfdcSHuang Ying int err, v; 799d9bfcfdcSHuang Ying 800d9bfcfdcSHuang Ying err = kstrtoint(buf, 10, &v); 801d9bfcfdcSHuang Ying if (err || v > SWAP_RA_ORDER_CEILING || v <= 0) 802d9bfcfdcSHuang Ying return -EINVAL; 803d9bfcfdcSHuang Ying 804d9bfcfdcSHuang Ying swap_ra_max_order = v; 805d9bfcfdcSHuang Ying 806d9bfcfdcSHuang Ying return count; 807d9bfcfdcSHuang Ying } 808d9bfcfdcSHuang Ying static struct kobj_attribute vma_ra_max_order_attr = 809d9bfcfdcSHuang Ying __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show, 810d9bfcfdcSHuang Ying vma_ra_max_order_store); 811d9bfcfdcSHuang Ying 812d9bfcfdcSHuang Ying static struct attribute *swap_attrs[] = { 813d9bfcfdcSHuang Ying &vma_ra_enabled_attr.attr, 814d9bfcfdcSHuang Ying &vma_ra_max_order_attr.attr, 815d9bfcfdcSHuang Ying NULL, 816d9bfcfdcSHuang Ying }; 817d9bfcfdcSHuang Ying 818d9bfcfdcSHuang Ying static struct attribute_group swap_attr_group = { 819d9bfcfdcSHuang Ying .attrs = swap_attrs, 820d9bfcfdcSHuang Ying }; 821d9bfcfdcSHuang Ying 822d9bfcfdcSHuang Ying static int __init swap_init_sysfs(void) 823d9bfcfdcSHuang Ying { 824d9bfcfdcSHuang Ying int err; 825d9bfcfdcSHuang Ying struct kobject *swap_kobj; 826d9bfcfdcSHuang Ying 827d9bfcfdcSHuang Ying swap_kobj = kobject_create_and_add("swap", mm_kobj); 828d9bfcfdcSHuang Ying if (!swap_kobj) { 829d9bfcfdcSHuang Ying pr_err("failed to create swap kobject\n"); 830d9bfcfdcSHuang Ying return -ENOMEM; 831d9bfcfdcSHuang Ying } 832d9bfcfdcSHuang Ying err = sysfs_create_group(swap_kobj, &swap_attr_group); 833d9bfcfdcSHuang Ying if (err) { 834d9bfcfdcSHuang Ying pr_err("failed to register swap group\n"); 835d9bfcfdcSHuang Ying goto delete_obj; 836d9bfcfdcSHuang Ying } 837d9bfcfdcSHuang Ying return 0; 838d9bfcfdcSHuang Ying 839d9bfcfdcSHuang Ying delete_obj: 840d9bfcfdcSHuang Ying kobject_put(swap_kobj); 841d9bfcfdcSHuang Ying return err; 842d9bfcfdcSHuang Ying } 843d9bfcfdcSHuang Ying subsys_initcall(swap_init_sysfs); 844d9bfcfdcSHuang Ying #endif 845