12b281117SSeth Jennings /* 22b281117SSeth Jennings * zswap.c - zswap driver file 32b281117SSeth Jennings * 42b281117SSeth Jennings * zswap is a backend for frontswap that takes pages that are in the process 52b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 62b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 72b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 82b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 92b281117SSeth Jennings * 102b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 112b281117SSeth Jennings * 122b281117SSeth Jennings * This program is free software; you can redistribute it and/or 132b281117SSeth Jennings * modify it under the terms of the GNU General Public License 142b281117SSeth Jennings * as published by the Free Software Foundation; either version 2 152b281117SSeth Jennings * of the License, or (at your option) any later version. 162b281117SSeth Jennings * 172b281117SSeth Jennings * This program is distributed in the hope that it will be useful, 182b281117SSeth Jennings * but WITHOUT ANY WARRANTY; without even the implied warranty of 192b281117SSeth Jennings * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 202b281117SSeth Jennings * GNU General Public License for more details. 212b281117SSeth Jennings */ 222b281117SSeth Jennings 232b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 242b281117SSeth Jennings 252b281117SSeth Jennings #include <linux/module.h> 262b281117SSeth Jennings #include <linux/cpu.h> 272b281117SSeth Jennings #include <linux/highmem.h> 282b281117SSeth Jennings #include <linux/slab.h> 292b281117SSeth Jennings #include <linux/spinlock.h> 302b281117SSeth Jennings #include <linux/types.h> 312b281117SSeth Jennings #include <linux/atomic.h> 322b281117SSeth Jennings #include <linux/frontswap.h> 332b281117SSeth Jennings #include <linux/rbtree.h> 342b281117SSeth Jennings #include <linux/swap.h> 352b281117SSeth Jennings #include <linux/crypto.h> 362b281117SSeth Jennings #include <linux/mempool.h> 3712d79d64SDan Streetman #include <linux/zpool.h> 382b281117SSeth Jennings 392b281117SSeth Jennings #include <linux/mm_types.h> 402b281117SSeth Jennings #include <linux/page-flags.h> 412b281117SSeth Jennings #include <linux/swapops.h> 422b281117SSeth Jennings #include <linux/writeback.h> 432b281117SSeth Jennings #include <linux/pagemap.h> 442b281117SSeth Jennings 452b281117SSeth Jennings /********************************* 462b281117SSeth Jennings * statistics 472b281117SSeth Jennings **********************************/ 4812d79d64SDan Streetman /* Total bytes used by the compressed storage */ 4912d79d64SDan Streetman static u64 zswap_pool_total_size; 502b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 512b281117SSeth Jennings static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 662b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 672b281117SSeth Jennings static u64 zswap_reject_compress_poor; 682b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 692b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 702b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 712b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 722b281117SSeth Jennings /* Duplicate store was encountered (rare) */ 732b281117SSeth Jennings static u64 zswap_duplicate_entry; 742b281117SSeth Jennings 752b281117SSeth Jennings /********************************* 762b281117SSeth Jennings * tunables 772b281117SSeth Jennings **********************************/ 78*c00ed16aSDan Streetman 79*c00ed16aSDan Streetman /* Enable/disable zswap (disabled by default) */ 80*c00ed16aSDan Streetman static bool zswap_enabled; 81*c00ed16aSDan Streetman module_param_named(enabled, zswap_enabled, bool, 0644); 822b281117SSeth Jennings 832b281117SSeth Jennings /* Compressor to be used by zswap (fixed at boot for now) */ 842b281117SSeth Jennings #define ZSWAP_COMPRESSOR_DEFAULT "lzo" 852b281117SSeth Jennings static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 8612ab028bSDan Streetman module_param_named(compressor, zswap_compressor, charp, 0444); 872b281117SSeth Jennings 882b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 892b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 902b281117SSeth Jennings module_param_named(max_pool_percent, 912b281117SSeth Jennings zswap_max_pool_percent, uint, 0644); 922b281117SSeth Jennings 9312d79d64SDan Streetman /* Compressed storage to use */ 9412d79d64SDan Streetman #define ZSWAP_ZPOOL_DEFAULT "zbud" 9512d79d64SDan Streetman static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; 9612d79d64SDan Streetman module_param_named(zpool, zswap_zpool_type, charp, 0444); 9712d79d64SDan Streetman 9812d79d64SDan Streetman /* zpool is shared by all of zswap backend */ 9912d79d64SDan Streetman static struct zpool *zswap_pool; 10060105e12SMinchan Kim 1012b281117SSeth Jennings /********************************* 1022b281117SSeth Jennings * compression functions 1032b281117SSeth Jennings **********************************/ 1042b281117SSeth Jennings /* per-cpu compression transforms */ 1052b281117SSeth Jennings static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; 1062b281117SSeth Jennings 1072b281117SSeth Jennings enum comp_op { 1082b281117SSeth Jennings ZSWAP_COMPOP_COMPRESS, 1092b281117SSeth Jennings ZSWAP_COMPOP_DECOMPRESS 1102b281117SSeth Jennings }; 1112b281117SSeth Jennings 1122b281117SSeth Jennings static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, 1132b281117SSeth Jennings u8 *dst, unsigned int *dlen) 1142b281117SSeth Jennings { 1152b281117SSeth Jennings struct crypto_comp *tfm; 1162b281117SSeth Jennings int ret; 1172b281117SSeth Jennings 1182b281117SSeth Jennings tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); 1192b281117SSeth Jennings switch (op) { 1202b281117SSeth Jennings case ZSWAP_COMPOP_COMPRESS: 1212b281117SSeth Jennings ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 1222b281117SSeth Jennings break; 1232b281117SSeth Jennings case ZSWAP_COMPOP_DECOMPRESS: 1242b281117SSeth Jennings ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 1252b281117SSeth Jennings break; 1262b281117SSeth Jennings default: 1272b281117SSeth Jennings ret = -EINVAL; 1282b281117SSeth Jennings } 1292b281117SSeth Jennings 1302b281117SSeth Jennings put_cpu(); 1312b281117SSeth Jennings return ret; 1322b281117SSeth Jennings } 1332b281117SSeth Jennings 1342b281117SSeth Jennings static int __init zswap_comp_init(void) 1352b281117SSeth Jennings { 1362b281117SSeth Jennings if (!crypto_has_comp(zswap_compressor, 0, 0)) { 1372b281117SSeth Jennings pr_info("%s compressor not available\n", zswap_compressor); 1382b281117SSeth Jennings /* fall back to default compressor */ 1392b281117SSeth Jennings zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 1402b281117SSeth Jennings if (!crypto_has_comp(zswap_compressor, 0, 0)) 1412b281117SSeth Jennings /* can't even load the default compressor */ 1422b281117SSeth Jennings return -ENODEV; 1432b281117SSeth Jennings } 1442b281117SSeth Jennings pr_info("using %s compressor\n", zswap_compressor); 1452b281117SSeth Jennings 1462b281117SSeth Jennings /* alloc percpu transforms */ 1472b281117SSeth Jennings zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 1482b281117SSeth Jennings if (!zswap_comp_pcpu_tfms) 1492b281117SSeth Jennings return -ENOMEM; 1502b281117SSeth Jennings return 0; 1512b281117SSeth Jennings } 1522b281117SSeth Jennings 153dd01d7d8SMahendran Ganesh static void __init zswap_comp_exit(void) 1542b281117SSeth Jennings { 1552b281117SSeth Jennings /* free percpu transforms */ 1562b281117SSeth Jennings free_percpu(zswap_comp_pcpu_tfms); 1572b281117SSeth Jennings } 1582b281117SSeth Jennings 1592b281117SSeth Jennings /********************************* 1602b281117SSeth Jennings * data structures 1612b281117SSeth Jennings **********************************/ 1622b281117SSeth Jennings /* 1632b281117SSeth Jennings * struct zswap_entry 1642b281117SSeth Jennings * 1652b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1662b281117SSeth Jennings * page within zswap. 1672b281117SSeth Jennings * 1682b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 1692b281117SSeth Jennings * refcount - the number of outstanding reference to the entry. This is needed 1702b281117SSeth Jennings * to protect against premature freeing of the entry by code 1716b452516SSeongJae Park * concurrent calls to load, invalidate, and writeback. The lock 1722b281117SSeth Jennings * for the zswap_tree structure that contains the entry must 1732b281117SSeth Jennings * be held while changing the refcount. Since the lock must 1742b281117SSeth Jennings * be held, there is no reason to also make refcount atomic. 1752b281117SSeth Jennings * offset - the swap offset for the entry. Index into the red-black tree. 17612d79d64SDan Streetman * handle - zpool allocation handle that stores the compressed page data 1772b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 1782b281117SSeth Jennings * decompression 1792b281117SSeth Jennings */ 1802b281117SSeth Jennings struct zswap_entry { 1812b281117SSeth Jennings struct rb_node rbnode; 1822b281117SSeth Jennings pgoff_t offset; 1832b281117SSeth Jennings int refcount; 1842b281117SSeth Jennings unsigned int length; 1852b281117SSeth Jennings unsigned long handle; 1862b281117SSeth Jennings }; 1872b281117SSeth Jennings 1882b281117SSeth Jennings struct zswap_header { 1892b281117SSeth Jennings swp_entry_t swpentry; 1902b281117SSeth Jennings }; 1912b281117SSeth Jennings 1922b281117SSeth Jennings /* 1932b281117SSeth Jennings * The tree lock in the zswap_tree struct protects a few things: 1942b281117SSeth Jennings * - the rbtree 1952b281117SSeth Jennings * - the refcount field of each entry in the tree 1962b281117SSeth Jennings */ 1972b281117SSeth Jennings struct zswap_tree { 1982b281117SSeth Jennings struct rb_root rbroot; 1992b281117SSeth Jennings spinlock_t lock; 2002b281117SSeth Jennings }; 2012b281117SSeth Jennings 2022b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 2032b281117SSeth Jennings 2042b281117SSeth Jennings /********************************* 2052b281117SSeth Jennings * zswap entry functions 2062b281117SSeth Jennings **********************************/ 2072b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache; 2082b281117SSeth Jennings 209dd01d7d8SMahendran Ganesh static int __init zswap_entry_cache_create(void) 2102b281117SSeth Jennings { 2112b281117SSeth Jennings zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 2125d2d42deSSeongJae Park return zswap_entry_cache == NULL; 2132b281117SSeth Jennings } 2142b281117SSeth Jennings 215c119239bSFabian Frederick static void __init zswap_entry_cache_destroy(void) 2162b281117SSeth Jennings { 2172b281117SSeth Jennings kmem_cache_destroy(zswap_entry_cache); 2182b281117SSeth Jennings } 2192b281117SSeth Jennings 2202b281117SSeth Jennings static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 2212b281117SSeth Jennings { 2222b281117SSeth Jennings struct zswap_entry *entry; 2232b281117SSeth Jennings entry = kmem_cache_alloc(zswap_entry_cache, gfp); 2242b281117SSeth Jennings if (!entry) 2252b281117SSeth Jennings return NULL; 2262b281117SSeth Jennings entry->refcount = 1; 2270ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 2282b281117SSeth Jennings return entry; 2292b281117SSeth Jennings } 2302b281117SSeth Jennings 2312b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry) 2322b281117SSeth Jennings { 2332b281117SSeth Jennings kmem_cache_free(zswap_entry_cache, entry); 2342b281117SSeth Jennings } 2352b281117SSeth Jennings 2362b281117SSeth Jennings /********************************* 2372b281117SSeth Jennings * rbtree functions 2382b281117SSeth Jennings **********************************/ 2392b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 2402b281117SSeth Jennings { 2412b281117SSeth Jennings struct rb_node *node = root->rb_node; 2422b281117SSeth Jennings struct zswap_entry *entry; 2432b281117SSeth Jennings 2442b281117SSeth Jennings while (node) { 2452b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 2462b281117SSeth Jennings if (entry->offset > offset) 2472b281117SSeth Jennings node = node->rb_left; 2482b281117SSeth Jennings else if (entry->offset < offset) 2492b281117SSeth Jennings node = node->rb_right; 2502b281117SSeth Jennings else 2512b281117SSeth Jennings return entry; 2522b281117SSeth Jennings } 2532b281117SSeth Jennings return NULL; 2542b281117SSeth Jennings } 2552b281117SSeth Jennings 2562b281117SSeth Jennings /* 2572b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 2582b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 2592b281117SSeth Jennings */ 2602b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 2612b281117SSeth Jennings struct zswap_entry **dupentry) 2622b281117SSeth Jennings { 2632b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 2642b281117SSeth Jennings struct zswap_entry *myentry; 2652b281117SSeth Jennings 2662b281117SSeth Jennings while (*link) { 2672b281117SSeth Jennings parent = *link; 2682b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 2692b281117SSeth Jennings if (myentry->offset > entry->offset) 2702b281117SSeth Jennings link = &(*link)->rb_left; 2712b281117SSeth Jennings else if (myentry->offset < entry->offset) 2722b281117SSeth Jennings link = &(*link)->rb_right; 2732b281117SSeth Jennings else { 2742b281117SSeth Jennings *dupentry = myentry; 2752b281117SSeth Jennings return -EEXIST; 2762b281117SSeth Jennings } 2772b281117SSeth Jennings } 2782b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 2792b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 2802b281117SSeth Jennings return 0; 2812b281117SSeth Jennings } 2822b281117SSeth Jennings 2830ab0abcfSWeijie Yang static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 2840ab0abcfSWeijie Yang { 2850ab0abcfSWeijie Yang if (!RB_EMPTY_NODE(&entry->rbnode)) { 2860ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 2870ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 2880ab0abcfSWeijie Yang } 2890ab0abcfSWeijie Yang } 2900ab0abcfSWeijie Yang 2910ab0abcfSWeijie Yang /* 29212d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 2930ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 2940ab0abcfSWeijie Yang */ 29560105e12SMinchan Kim static void zswap_free_entry(struct zswap_entry *entry) 2960ab0abcfSWeijie Yang { 29712d79d64SDan Streetman zpool_free(zswap_pool, entry->handle); 2980ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 2990ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 30012d79d64SDan Streetman zswap_pool_total_size = zpool_get_total_size(zswap_pool); 3010ab0abcfSWeijie Yang } 3020ab0abcfSWeijie Yang 3030ab0abcfSWeijie Yang /* caller must hold the tree lock */ 3040ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry) 3050ab0abcfSWeijie Yang { 3060ab0abcfSWeijie Yang entry->refcount++; 3070ab0abcfSWeijie Yang } 3080ab0abcfSWeijie Yang 3090ab0abcfSWeijie Yang /* caller must hold the tree lock 3100ab0abcfSWeijie Yang * remove from the tree and free it, if nobody reference the entry 3110ab0abcfSWeijie Yang */ 3120ab0abcfSWeijie Yang static void zswap_entry_put(struct zswap_tree *tree, 3130ab0abcfSWeijie Yang struct zswap_entry *entry) 3140ab0abcfSWeijie Yang { 3150ab0abcfSWeijie Yang int refcount = --entry->refcount; 3160ab0abcfSWeijie Yang 3170ab0abcfSWeijie Yang BUG_ON(refcount < 0); 3180ab0abcfSWeijie Yang if (refcount == 0) { 3190ab0abcfSWeijie Yang zswap_rb_erase(&tree->rbroot, entry); 32060105e12SMinchan Kim zswap_free_entry(entry); 3210ab0abcfSWeijie Yang } 3220ab0abcfSWeijie Yang } 3230ab0abcfSWeijie Yang 3240ab0abcfSWeijie Yang /* caller must hold the tree lock */ 3250ab0abcfSWeijie Yang static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 3260ab0abcfSWeijie Yang pgoff_t offset) 3270ab0abcfSWeijie Yang { 3280ab0abcfSWeijie Yang struct zswap_entry *entry = NULL; 3290ab0abcfSWeijie Yang 3300ab0abcfSWeijie Yang entry = zswap_rb_search(root, offset); 3310ab0abcfSWeijie Yang if (entry) 3320ab0abcfSWeijie Yang zswap_entry_get(entry); 3330ab0abcfSWeijie Yang 3340ab0abcfSWeijie Yang return entry; 3350ab0abcfSWeijie Yang } 3360ab0abcfSWeijie Yang 3372b281117SSeth Jennings /********************************* 3382b281117SSeth Jennings * per-cpu code 3392b281117SSeth Jennings **********************************/ 3402b281117SSeth Jennings static DEFINE_PER_CPU(u8 *, zswap_dstmem); 3412b281117SSeth Jennings 3422b281117SSeth Jennings static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) 3432b281117SSeth Jennings { 3442b281117SSeth Jennings struct crypto_comp *tfm; 3452b281117SSeth Jennings u8 *dst; 3462b281117SSeth Jennings 3472b281117SSeth Jennings switch (action) { 3482b281117SSeth Jennings case CPU_UP_PREPARE: 3492b281117SSeth Jennings tfm = crypto_alloc_comp(zswap_compressor, 0, 0); 3502b281117SSeth Jennings if (IS_ERR(tfm)) { 3512b281117SSeth Jennings pr_err("can't allocate compressor transform\n"); 3522b281117SSeth Jennings return NOTIFY_BAD; 3532b281117SSeth Jennings } 3542b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; 35572d09633SEric Dumazet dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 3562b281117SSeth Jennings if (!dst) { 3572b281117SSeth Jennings pr_err("can't allocate compressor buffer\n"); 3582b281117SSeth Jennings crypto_free_comp(tfm); 3592b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 3602b281117SSeth Jennings return NOTIFY_BAD; 3612b281117SSeth Jennings } 3622b281117SSeth Jennings per_cpu(zswap_dstmem, cpu) = dst; 3632b281117SSeth Jennings break; 3642b281117SSeth Jennings case CPU_DEAD: 3652b281117SSeth Jennings case CPU_UP_CANCELED: 3662b281117SSeth Jennings tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); 3672b281117SSeth Jennings if (tfm) { 3682b281117SSeth Jennings crypto_free_comp(tfm); 3692b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 3702b281117SSeth Jennings } 3712b281117SSeth Jennings dst = per_cpu(zswap_dstmem, cpu); 3722b281117SSeth Jennings kfree(dst); 3732b281117SSeth Jennings per_cpu(zswap_dstmem, cpu) = NULL; 3742b281117SSeth Jennings break; 3752b281117SSeth Jennings default: 3762b281117SSeth Jennings break; 3772b281117SSeth Jennings } 3782b281117SSeth Jennings return NOTIFY_OK; 3792b281117SSeth Jennings } 3802b281117SSeth Jennings 3812b281117SSeth Jennings static int zswap_cpu_notifier(struct notifier_block *nb, 3822b281117SSeth Jennings unsigned long action, void *pcpu) 3832b281117SSeth Jennings { 3842b281117SSeth Jennings unsigned long cpu = (unsigned long)pcpu; 3852b281117SSeth Jennings return __zswap_cpu_notifier(action, cpu); 3862b281117SSeth Jennings } 3872b281117SSeth Jennings 3882b281117SSeth Jennings static struct notifier_block zswap_cpu_notifier_block = { 3892b281117SSeth Jennings .notifier_call = zswap_cpu_notifier 3902b281117SSeth Jennings }; 3912b281117SSeth Jennings 392dd01d7d8SMahendran Ganesh static int __init zswap_cpu_init(void) 3932b281117SSeth Jennings { 3942b281117SSeth Jennings unsigned long cpu; 3952b281117SSeth Jennings 39657637824SSrivatsa S. Bhat cpu_notifier_register_begin(); 3972b281117SSeth Jennings for_each_online_cpu(cpu) 3982b281117SSeth Jennings if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) 3992b281117SSeth Jennings goto cleanup; 40057637824SSrivatsa S. Bhat __register_cpu_notifier(&zswap_cpu_notifier_block); 40157637824SSrivatsa S. Bhat cpu_notifier_register_done(); 4022b281117SSeth Jennings return 0; 4032b281117SSeth Jennings 4042b281117SSeth Jennings cleanup: 4052b281117SSeth Jennings for_each_online_cpu(cpu) 4062b281117SSeth Jennings __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); 40757637824SSrivatsa S. Bhat cpu_notifier_register_done(); 4082b281117SSeth Jennings return -ENOMEM; 4092b281117SSeth Jennings } 4102b281117SSeth Jennings 4112b281117SSeth Jennings /********************************* 4122b281117SSeth Jennings * helpers 4132b281117SSeth Jennings **********************************/ 4142b281117SSeth Jennings static bool zswap_is_full(void) 4152b281117SSeth Jennings { 4165d2d42deSSeongJae Park return totalram_pages * zswap_max_pool_percent / 100 < 41712d79d64SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 4182b281117SSeth Jennings } 4192b281117SSeth Jennings 4202b281117SSeth Jennings /********************************* 4212b281117SSeth Jennings * writeback code 4222b281117SSeth Jennings **********************************/ 4232b281117SSeth Jennings /* return enum for zswap_get_swap_cache_page */ 4242b281117SSeth Jennings enum zswap_get_swap_ret { 4252b281117SSeth Jennings ZSWAP_SWAPCACHE_NEW, 4262b281117SSeth Jennings ZSWAP_SWAPCACHE_EXIST, 42767d13fe8SWeijie Yang ZSWAP_SWAPCACHE_FAIL, 4282b281117SSeth Jennings }; 4292b281117SSeth Jennings 4302b281117SSeth Jennings /* 4312b281117SSeth Jennings * zswap_get_swap_cache_page 4322b281117SSeth Jennings * 4332b281117SSeth Jennings * This is an adaption of read_swap_cache_async() 4342b281117SSeth Jennings * 4352b281117SSeth Jennings * This function tries to find a page with the given swap entry 4362b281117SSeth Jennings * in the swapper_space address space (the swap cache). If the page 4372b281117SSeth Jennings * is found, it is returned in retpage. Otherwise, a page is allocated, 4382b281117SSeth Jennings * added to the swap cache, and returned in retpage. 4392b281117SSeth Jennings * 4402b281117SSeth Jennings * If success, the swap cache page is returned in retpage 44167d13fe8SWeijie Yang * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 44267d13fe8SWeijie Yang * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 44367d13fe8SWeijie Yang * the new page is added to swapcache and locked 44467d13fe8SWeijie Yang * Returns ZSWAP_SWAPCACHE_FAIL on error 4452b281117SSeth Jennings */ 4462b281117SSeth Jennings static int zswap_get_swap_cache_page(swp_entry_t entry, 4472b281117SSeth Jennings struct page **retpage) 4482b281117SSeth Jennings { 4492b281117SSeth Jennings struct page *found_page, *new_page = NULL; 450822518dcSSunghan Suh struct address_space *swapper_space = swap_address_space(entry); 4512b281117SSeth Jennings int err; 4522b281117SSeth Jennings 4532b281117SSeth Jennings *retpage = NULL; 4542b281117SSeth Jennings do { 4552b281117SSeth Jennings /* 4562b281117SSeth Jennings * First check the swap cache. Since this is normally 4572b281117SSeth Jennings * called after lookup_swap_cache() failed, re-calling 4582b281117SSeth Jennings * that would confuse statistics. 4592b281117SSeth Jennings */ 4602b281117SSeth Jennings found_page = find_get_page(swapper_space, entry.val); 4612b281117SSeth Jennings if (found_page) 4622b281117SSeth Jennings break; 4632b281117SSeth Jennings 4642b281117SSeth Jennings /* 4652b281117SSeth Jennings * Get a new page to read into from swap. 4662b281117SSeth Jennings */ 4672b281117SSeth Jennings if (!new_page) { 4682b281117SSeth Jennings new_page = alloc_page(GFP_KERNEL); 4692b281117SSeth Jennings if (!new_page) 4702b281117SSeth Jennings break; /* Out of memory */ 4712b281117SSeth Jennings } 4722b281117SSeth Jennings 4732b281117SSeth Jennings /* 4742b281117SSeth Jennings * call radix_tree_preload() while we can wait. 4752b281117SSeth Jennings */ 4762b281117SSeth Jennings err = radix_tree_preload(GFP_KERNEL); 4772b281117SSeth Jennings if (err) 4782b281117SSeth Jennings break; 4792b281117SSeth Jennings 4802b281117SSeth Jennings /* 4812b281117SSeth Jennings * Swap entry may have been freed since our caller observed it. 4822b281117SSeth Jennings */ 4832b281117SSeth Jennings err = swapcache_prepare(entry); 4842b281117SSeth Jennings if (err == -EEXIST) { /* seems racy */ 4852b281117SSeth Jennings radix_tree_preload_end(); 4862b281117SSeth Jennings continue; 4872b281117SSeth Jennings } 4882b281117SSeth Jennings if (err) { /* swp entry is obsolete ? */ 4892b281117SSeth Jennings radix_tree_preload_end(); 4902b281117SSeth Jennings break; 4912b281117SSeth Jennings } 4922b281117SSeth Jennings 4932b281117SSeth Jennings /* May fail (-ENOMEM) if radix-tree node allocation failed. */ 4942b281117SSeth Jennings __set_page_locked(new_page); 4952b281117SSeth Jennings SetPageSwapBacked(new_page); 4962b281117SSeth Jennings err = __add_to_swap_cache(new_page, entry); 4972b281117SSeth Jennings if (likely(!err)) { 4982b281117SSeth Jennings radix_tree_preload_end(); 4992b281117SSeth Jennings lru_cache_add_anon(new_page); 5002b281117SSeth Jennings *retpage = new_page; 5012b281117SSeth Jennings return ZSWAP_SWAPCACHE_NEW; 5022b281117SSeth Jennings } 5032b281117SSeth Jennings radix_tree_preload_end(); 5042b281117SSeth Jennings ClearPageSwapBacked(new_page); 5052b281117SSeth Jennings __clear_page_locked(new_page); 5062b281117SSeth Jennings /* 5072b281117SSeth Jennings * add_to_swap_cache() doesn't return -EEXIST, so we can safely 5082b281117SSeth Jennings * clear SWAP_HAS_CACHE flag. 5092b281117SSeth Jennings */ 5100a31bc97SJohannes Weiner swapcache_free(entry); 5112b281117SSeth Jennings } while (err != -ENOMEM); 5122b281117SSeth Jennings 5132b281117SSeth Jennings if (new_page) 5142b281117SSeth Jennings page_cache_release(new_page); 5152b281117SSeth Jennings if (!found_page) 51667d13fe8SWeijie Yang return ZSWAP_SWAPCACHE_FAIL; 5172b281117SSeth Jennings *retpage = found_page; 5182b281117SSeth Jennings return ZSWAP_SWAPCACHE_EXIST; 5192b281117SSeth Jennings } 5202b281117SSeth Jennings 5212b281117SSeth Jennings /* 5222b281117SSeth Jennings * Attempts to free an entry by adding a page to the swap cache, 5232b281117SSeth Jennings * decompressing the entry data into the page, and issuing a 5242b281117SSeth Jennings * bio write to write the page back to the swap device. 5252b281117SSeth Jennings * 5262b281117SSeth Jennings * This can be thought of as a "resumed writeback" of the page 5272b281117SSeth Jennings * to the swap device. We are basically resuming the same swap 5282b281117SSeth Jennings * writeback path that was intercepted with the frontswap_store() 5292b281117SSeth Jennings * in the first place. After the page has been decompressed into 5302b281117SSeth Jennings * the swap cache, the compressed version stored by zswap can be 5312b281117SSeth Jennings * freed. 5322b281117SSeth Jennings */ 53312d79d64SDan Streetman static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 5342b281117SSeth Jennings { 5352b281117SSeth Jennings struct zswap_header *zhdr; 5362b281117SSeth Jennings swp_entry_t swpentry; 5372b281117SSeth Jennings struct zswap_tree *tree; 5382b281117SSeth Jennings pgoff_t offset; 5392b281117SSeth Jennings struct zswap_entry *entry; 5402b281117SSeth Jennings struct page *page; 5412b281117SSeth Jennings u8 *src, *dst; 5422b281117SSeth Jennings unsigned int dlen; 5430ab0abcfSWeijie Yang int ret; 5442b281117SSeth Jennings struct writeback_control wbc = { 5452b281117SSeth Jennings .sync_mode = WB_SYNC_NONE, 5462b281117SSeth Jennings }; 5472b281117SSeth Jennings 5482b281117SSeth Jennings /* extract swpentry from data */ 54912d79d64SDan Streetman zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 5502b281117SSeth Jennings swpentry = zhdr->swpentry; /* here */ 55112d79d64SDan Streetman zpool_unmap_handle(pool, handle); 5522b281117SSeth Jennings tree = zswap_trees[swp_type(swpentry)]; 5532b281117SSeth Jennings offset = swp_offset(swpentry); 5542b281117SSeth Jennings 5552b281117SSeth Jennings /* find and ref zswap entry */ 5562b281117SSeth Jennings spin_lock(&tree->lock); 5570ab0abcfSWeijie Yang entry = zswap_entry_find_get(&tree->rbroot, offset); 5582b281117SSeth Jennings if (!entry) { 5592b281117SSeth Jennings /* entry was invalidated */ 5602b281117SSeth Jennings spin_unlock(&tree->lock); 5612b281117SSeth Jennings return 0; 5622b281117SSeth Jennings } 5632b281117SSeth Jennings spin_unlock(&tree->lock); 5642b281117SSeth Jennings BUG_ON(offset != entry->offset); 5652b281117SSeth Jennings 5662b281117SSeth Jennings /* try to allocate swap cache page */ 5672b281117SSeth Jennings switch (zswap_get_swap_cache_page(swpentry, &page)) { 56867d13fe8SWeijie Yang case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 5692b281117SSeth Jennings ret = -ENOMEM; 5702b281117SSeth Jennings goto fail; 5712b281117SSeth Jennings 57267d13fe8SWeijie Yang case ZSWAP_SWAPCACHE_EXIST: 5732b281117SSeth Jennings /* page is already in the swap cache, ignore for now */ 5742b281117SSeth Jennings page_cache_release(page); 5752b281117SSeth Jennings ret = -EEXIST; 5762b281117SSeth Jennings goto fail; 5772b281117SSeth Jennings 5782b281117SSeth Jennings case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 5792b281117SSeth Jennings /* decompress */ 5802b281117SSeth Jennings dlen = PAGE_SIZE; 58112d79d64SDan Streetman src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, 58212d79d64SDan Streetman ZPOOL_MM_RO) + sizeof(struct zswap_header); 5832b281117SSeth Jennings dst = kmap_atomic(page); 5842b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 5852b281117SSeth Jennings entry->length, dst, &dlen); 5862b281117SSeth Jennings kunmap_atomic(dst); 58712d79d64SDan Streetman zpool_unmap_handle(zswap_pool, entry->handle); 5882b281117SSeth Jennings BUG_ON(ret); 5892b281117SSeth Jennings BUG_ON(dlen != PAGE_SIZE); 5902b281117SSeth Jennings 5912b281117SSeth Jennings /* page is up to date */ 5922b281117SSeth Jennings SetPageUptodate(page); 5932b281117SSeth Jennings } 5942b281117SSeth Jennings 595b349acc7SWeijie Yang /* move it to the tail of the inactive list after end_writeback */ 596b349acc7SWeijie Yang SetPageReclaim(page); 597b349acc7SWeijie Yang 5982b281117SSeth Jennings /* start writeback */ 5992b281117SSeth Jennings __swap_writepage(page, &wbc, end_swap_bio_write); 6002b281117SSeth Jennings page_cache_release(page); 6012b281117SSeth Jennings zswap_written_back_pages++; 6022b281117SSeth Jennings 6032b281117SSeth Jennings spin_lock(&tree->lock); 6042b281117SSeth Jennings /* drop local reference */ 6050ab0abcfSWeijie Yang zswap_entry_put(tree, entry); 6062b281117SSeth Jennings 6072b281117SSeth Jennings /* 6080ab0abcfSWeijie Yang * There are two possible situations for entry here: 6090ab0abcfSWeijie Yang * (1) refcount is 1(normal case), entry is valid and on the tree 6100ab0abcfSWeijie Yang * (2) refcount is 0, entry is freed and not on the tree 6110ab0abcfSWeijie Yang * because invalidate happened during writeback 6120ab0abcfSWeijie Yang * search the tree and free the entry if find entry 6132b281117SSeth Jennings */ 6140ab0abcfSWeijie Yang if (entry == zswap_rb_search(&tree->rbroot, offset)) 6150ab0abcfSWeijie Yang zswap_entry_put(tree, entry); 6162b281117SSeth Jennings spin_unlock(&tree->lock); 6172b281117SSeth Jennings 6180ab0abcfSWeijie Yang goto end; 6190ab0abcfSWeijie Yang 6200ab0abcfSWeijie Yang /* 6210ab0abcfSWeijie Yang * if we get here due to ZSWAP_SWAPCACHE_EXIST 6220ab0abcfSWeijie Yang * a load may happening concurrently 6230ab0abcfSWeijie Yang * it is safe and okay to not free the entry 6240ab0abcfSWeijie Yang * if we free the entry in the following put 6250ab0abcfSWeijie Yang * it it either okay to return !0 6260ab0abcfSWeijie Yang */ 6272b281117SSeth Jennings fail: 6282b281117SSeth Jennings spin_lock(&tree->lock); 6290ab0abcfSWeijie Yang zswap_entry_put(tree, entry); 6302b281117SSeth Jennings spin_unlock(&tree->lock); 6310ab0abcfSWeijie Yang 6320ab0abcfSWeijie Yang end: 6332b281117SSeth Jennings return ret; 6342b281117SSeth Jennings } 6352b281117SSeth Jennings 6362b281117SSeth Jennings /********************************* 6372b281117SSeth Jennings * frontswap hooks 6382b281117SSeth Jennings **********************************/ 6392b281117SSeth Jennings /* attempts to compress and store an single page */ 6402b281117SSeth Jennings static int zswap_frontswap_store(unsigned type, pgoff_t offset, 6412b281117SSeth Jennings struct page *page) 6422b281117SSeth Jennings { 6432b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 6442b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 6452b281117SSeth Jennings int ret; 6462b281117SSeth Jennings unsigned int dlen = PAGE_SIZE, len; 6472b281117SSeth Jennings unsigned long handle; 6482b281117SSeth Jennings char *buf; 6492b281117SSeth Jennings u8 *src, *dst; 6502b281117SSeth Jennings struct zswap_header *zhdr; 6512b281117SSeth Jennings 652*c00ed16aSDan Streetman if (!zswap_enabled || !tree) { 6532b281117SSeth Jennings ret = -ENODEV; 6542b281117SSeth Jennings goto reject; 6552b281117SSeth Jennings } 6562b281117SSeth Jennings 6572b281117SSeth Jennings /* reclaim space if needed */ 6582b281117SSeth Jennings if (zswap_is_full()) { 6592b281117SSeth Jennings zswap_pool_limit_hit++; 66012d79d64SDan Streetman if (zpool_shrink(zswap_pool, 1, NULL)) { 6612b281117SSeth Jennings zswap_reject_reclaim_fail++; 6622b281117SSeth Jennings ret = -ENOMEM; 6632b281117SSeth Jennings goto reject; 6642b281117SSeth Jennings } 6652b281117SSeth Jennings } 6662b281117SSeth Jennings 6672b281117SSeth Jennings /* allocate entry */ 6682b281117SSeth Jennings entry = zswap_entry_cache_alloc(GFP_KERNEL); 6692b281117SSeth Jennings if (!entry) { 6702b281117SSeth Jennings zswap_reject_kmemcache_fail++; 6712b281117SSeth Jennings ret = -ENOMEM; 6722b281117SSeth Jennings goto reject; 6732b281117SSeth Jennings } 6742b281117SSeth Jennings 6752b281117SSeth Jennings /* compress */ 6762b281117SSeth Jennings dst = get_cpu_var(zswap_dstmem); 6772b281117SSeth Jennings src = kmap_atomic(page); 6782b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); 6792b281117SSeth Jennings kunmap_atomic(src); 6802b281117SSeth Jennings if (ret) { 6812b281117SSeth Jennings ret = -EINVAL; 6822b281117SSeth Jennings goto freepage; 6832b281117SSeth Jennings } 6842b281117SSeth Jennings 6852b281117SSeth Jennings /* store */ 6862b281117SSeth Jennings len = dlen + sizeof(struct zswap_header); 68712d79d64SDan Streetman ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, 6882b281117SSeth Jennings &handle); 6892b281117SSeth Jennings if (ret == -ENOSPC) { 6902b281117SSeth Jennings zswap_reject_compress_poor++; 6912b281117SSeth Jennings goto freepage; 6922b281117SSeth Jennings } 6932b281117SSeth Jennings if (ret) { 6942b281117SSeth Jennings zswap_reject_alloc_fail++; 6952b281117SSeth Jennings goto freepage; 6962b281117SSeth Jennings } 69712d79d64SDan Streetman zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW); 6982b281117SSeth Jennings zhdr->swpentry = swp_entry(type, offset); 6992b281117SSeth Jennings buf = (u8 *)(zhdr + 1); 7002b281117SSeth Jennings memcpy(buf, dst, dlen); 70112d79d64SDan Streetman zpool_unmap_handle(zswap_pool, handle); 7022b281117SSeth Jennings put_cpu_var(zswap_dstmem); 7032b281117SSeth Jennings 7042b281117SSeth Jennings /* populate entry */ 7052b281117SSeth Jennings entry->offset = offset; 7062b281117SSeth Jennings entry->handle = handle; 7072b281117SSeth Jennings entry->length = dlen; 7082b281117SSeth Jennings 7092b281117SSeth Jennings /* map */ 7102b281117SSeth Jennings spin_lock(&tree->lock); 7112b281117SSeth Jennings do { 7122b281117SSeth Jennings ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 7132b281117SSeth Jennings if (ret == -EEXIST) { 7142b281117SSeth Jennings zswap_duplicate_entry++; 7152b281117SSeth Jennings /* remove from rbtree */ 7160ab0abcfSWeijie Yang zswap_rb_erase(&tree->rbroot, dupentry); 7170ab0abcfSWeijie Yang zswap_entry_put(tree, dupentry); 7182b281117SSeth Jennings } 7192b281117SSeth Jennings } while (ret == -EEXIST); 7202b281117SSeth Jennings spin_unlock(&tree->lock); 7212b281117SSeth Jennings 7222b281117SSeth Jennings /* update stats */ 7232b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 72412d79d64SDan Streetman zswap_pool_total_size = zpool_get_total_size(zswap_pool); 7252b281117SSeth Jennings 7262b281117SSeth Jennings return 0; 7272b281117SSeth Jennings 7282b281117SSeth Jennings freepage: 7292b281117SSeth Jennings put_cpu_var(zswap_dstmem); 7302b281117SSeth Jennings zswap_entry_cache_free(entry); 7312b281117SSeth Jennings reject: 7322b281117SSeth Jennings return ret; 7332b281117SSeth Jennings } 7342b281117SSeth Jennings 7352b281117SSeth Jennings /* 7362b281117SSeth Jennings * returns 0 if the page was successfully decompressed 7372b281117SSeth Jennings * return -1 on entry not found or error 7382b281117SSeth Jennings */ 7392b281117SSeth Jennings static int zswap_frontswap_load(unsigned type, pgoff_t offset, 7402b281117SSeth Jennings struct page *page) 7412b281117SSeth Jennings { 7422b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 7432b281117SSeth Jennings struct zswap_entry *entry; 7442b281117SSeth Jennings u8 *src, *dst; 7452b281117SSeth Jennings unsigned int dlen; 7460ab0abcfSWeijie Yang int ret; 7472b281117SSeth Jennings 7482b281117SSeth Jennings /* find */ 7492b281117SSeth Jennings spin_lock(&tree->lock); 7500ab0abcfSWeijie Yang entry = zswap_entry_find_get(&tree->rbroot, offset); 7512b281117SSeth Jennings if (!entry) { 7522b281117SSeth Jennings /* entry was written back */ 7532b281117SSeth Jennings spin_unlock(&tree->lock); 7542b281117SSeth Jennings return -1; 7552b281117SSeth Jennings } 7562b281117SSeth Jennings spin_unlock(&tree->lock); 7572b281117SSeth Jennings 7582b281117SSeth Jennings /* decompress */ 7592b281117SSeth Jennings dlen = PAGE_SIZE; 76012d79d64SDan Streetman src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, 76112d79d64SDan Streetman ZPOOL_MM_RO) + sizeof(struct zswap_header); 7622b281117SSeth Jennings dst = kmap_atomic(page); 7632b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 7642b281117SSeth Jennings dst, &dlen); 7652b281117SSeth Jennings kunmap_atomic(dst); 76612d79d64SDan Streetman zpool_unmap_handle(zswap_pool, entry->handle); 7672b281117SSeth Jennings BUG_ON(ret); 7682b281117SSeth Jennings 7692b281117SSeth Jennings spin_lock(&tree->lock); 7700ab0abcfSWeijie Yang zswap_entry_put(tree, entry); 7712b281117SSeth Jennings spin_unlock(&tree->lock); 7722b281117SSeth Jennings 7732b281117SSeth Jennings return 0; 7742b281117SSeth Jennings } 7752b281117SSeth Jennings 7762b281117SSeth Jennings /* frees an entry in zswap */ 7772b281117SSeth Jennings static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 7782b281117SSeth Jennings { 7792b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 7802b281117SSeth Jennings struct zswap_entry *entry; 7812b281117SSeth Jennings 7822b281117SSeth Jennings /* find */ 7832b281117SSeth Jennings spin_lock(&tree->lock); 7842b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 7852b281117SSeth Jennings if (!entry) { 7862b281117SSeth Jennings /* entry was written back */ 7872b281117SSeth Jennings spin_unlock(&tree->lock); 7882b281117SSeth Jennings return; 7892b281117SSeth Jennings } 7902b281117SSeth Jennings 7912b281117SSeth Jennings /* remove from rbtree */ 7920ab0abcfSWeijie Yang zswap_rb_erase(&tree->rbroot, entry); 7932b281117SSeth Jennings 7942b281117SSeth Jennings /* drop the initial reference from entry creation */ 7950ab0abcfSWeijie Yang zswap_entry_put(tree, entry); 7962b281117SSeth Jennings 7972b281117SSeth Jennings spin_unlock(&tree->lock); 7982b281117SSeth Jennings } 7992b281117SSeth Jennings 8002b281117SSeth Jennings /* frees all zswap entries for the given swap type */ 8012b281117SSeth Jennings static void zswap_frontswap_invalidate_area(unsigned type) 8022b281117SSeth Jennings { 8032b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 8040bd42136SCody P Schafer struct zswap_entry *entry, *n; 8052b281117SSeth Jennings 8062b281117SSeth Jennings if (!tree) 8072b281117SSeth Jennings return; 8082b281117SSeth Jennings 8092b281117SSeth Jennings /* walk the tree and free everything */ 8102b281117SSeth Jennings spin_lock(&tree->lock); 8110ab0abcfSWeijie Yang rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 81260105e12SMinchan Kim zswap_free_entry(entry); 8132b281117SSeth Jennings tree->rbroot = RB_ROOT; 8142b281117SSeth Jennings spin_unlock(&tree->lock); 815aa9bca05SWeijie Yang kfree(tree); 816aa9bca05SWeijie Yang zswap_trees[type] = NULL; 8172b281117SSeth Jennings } 8182b281117SSeth Jennings 81912d79d64SDan Streetman static struct zpool_ops zswap_zpool_ops = { 8202b281117SSeth Jennings .evict = zswap_writeback_entry 8212b281117SSeth Jennings }; 8222b281117SSeth Jennings 8232b281117SSeth Jennings static void zswap_frontswap_init(unsigned type) 8242b281117SSeth Jennings { 8252b281117SSeth Jennings struct zswap_tree *tree; 8262b281117SSeth Jennings 8272b281117SSeth Jennings tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 82860105e12SMinchan Kim if (!tree) { 82960105e12SMinchan Kim pr_err("alloc failed, zswap disabled for swap type %d\n", type); 83060105e12SMinchan Kim return; 83160105e12SMinchan Kim } 83260105e12SMinchan Kim 8332b281117SSeth Jennings tree->rbroot = RB_ROOT; 8342b281117SSeth Jennings spin_lock_init(&tree->lock); 8352b281117SSeth Jennings zswap_trees[type] = tree; 8362b281117SSeth Jennings } 8372b281117SSeth Jennings 8382b281117SSeth Jennings static struct frontswap_ops zswap_frontswap_ops = { 8392b281117SSeth Jennings .store = zswap_frontswap_store, 8402b281117SSeth Jennings .load = zswap_frontswap_load, 8412b281117SSeth Jennings .invalidate_page = zswap_frontswap_invalidate_page, 8422b281117SSeth Jennings .invalidate_area = zswap_frontswap_invalidate_area, 8432b281117SSeth Jennings .init = zswap_frontswap_init 8442b281117SSeth Jennings }; 8452b281117SSeth Jennings 8462b281117SSeth Jennings /********************************* 8472b281117SSeth Jennings * debugfs functions 8482b281117SSeth Jennings **********************************/ 8492b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 8502b281117SSeth Jennings #include <linux/debugfs.h> 8512b281117SSeth Jennings 8522b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 8532b281117SSeth Jennings 8542b281117SSeth Jennings static int __init zswap_debugfs_init(void) 8552b281117SSeth Jennings { 8562b281117SSeth Jennings if (!debugfs_initialized()) 8572b281117SSeth Jennings return -ENODEV; 8582b281117SSeth Jennings 8592b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 8602b281117SSeth Jennings if (!zswap_debugfs_root) 8612b281117SSeth Jennings return -ENOMEM; 8622b281117SSeth Jennings 8632b281117SSeth Jennings debugfs_create_u64("pool_limit_hit", S_IRUGO, 8642b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 8652b281117SSeth Jennings debugfs_create_u64("reject_reclaim_fail", S_IRUGO, 8662b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 8672b281117SSeth Jennings debugfs_create_u64("reject_alloc_fail", S_IRUGO, 8682b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 8692b281117SSeth Jennings debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, 8702b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 8712b281117SSeth Jennings debugfs_create_u64("reject_compress_poor", S_IRUGO, 8722b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 8732b281117SSeth Jennings debugfs_create_u64("written_back_pages", S_IRUGO, 8742b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 8752b281117SSeth Jennings debugfs_create_u64("duplicate_entry", S_IRUGO, 8762b281117SSeth Jennings zswap_debugfs_root, &zswap_duplicate_entry); 87712d79d64SDan Streetman debugfs_create_u64("pool_total_size", S_IRUGO, 87812d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 8792b281117SSeth Jennings debugfs_create_atomic_t("stored_pages", S_IRUGO, 8802b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 8812b281117SSeth Jennings 8822b281117SSeth Jennings return 0; 8832b281117SSeth Jennings } 8842b281117SSeth Jennings 8852b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) 8862b281117SSeth Jennings { 8872b281117SSeth Jennings debugfs_remove_recursive(zswap_debugfs_root); 8882b281117SSeth Jennings } 8892b281117SSeth Jennings #else 8902b281117SSeth Jennings static int __init zswap_debugfs_init(void) 8912b281117SSeth Jennings { 8922b281117SSeth Jennings return 0; 8932b281117SSeth Jennings } 8942b281117SSeth Jennings 8952b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) { } 8962b281117SSeth Jennings #endif 8972b281117SSeth Jennings 8982b281117SSeth Jennings /********************************* 8992b281117SSeth Jennings * module init and exit 9002b281117SSeth Jennings **********************************/ 9012b281117SSeth Jennings static int __init init_zswap(void) 9022b281117SSeth Jennings { 90312d79d64SDan Streetman gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; 90412d79d64SDan Streetman 9052b281117SSeth Jennings pr_info("loading zswap\n"); 90660105e12SMinchan Kim 9073eba0c6aSGanesh Mahendran zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, 9083eba0c6aSGanesh Mahendran &zswap_zpool_ops); 90912d79d64SDan Streetman if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { 91012d79d64SDan Streetman pr_info("%s zpool not available\n", zswap_zpool_type); 91112d79d64SDan Streetman zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; 9123eba0c6aSGanesh Mahendran zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, 91312d79d64SDan Streetman &zswap_zpool_ops); 91412d79d64SDan Streetman } 91560105e12SMinchan Kim if (!zswap_pool) { 91612d79d64SDan Streetman pr_err("%s zpool not available\n", zswap_zpool_type); 91712d79d64SDan Streetman pr_err("zpool creation failed\n"); 91860105e12SMinchan Kim goto error; 91960105e12SMinchan Kim } 92012d79d64SDan Streetman pr_info("using %s pool\n", zswap_zpool_type); 92160105e12SMinchan Kim 9222b281117SSeth Jennings if (zswap_entry_cache_create()) { 9232b281117SSeth Jennings pr_err("entry cache creation failed\n"); 92460105e12SMinchan Kim goto cachefail; 9252b281117SSeth Jennings } 9262b281117SSeth Jennings if (zswap_comp_init()) { 9272b281117SSeth Jennings pr_err("compressor initialization failed\n"); 9282b281117SSeth Jennings goto compfail; 9292b281117SSeth Jennings } 9302b281117SSeth Jennings if (zswap_cpu_init()) { 9312b281117SSeth Jennings pr_err("per-cpu initialization failed\n"); 9322b281117SSeth Jennings goto pcpufail; 9332b281117SSeth Jennings } 93460105e12SMinchan Kim 9352b281117SSeth Jennings frontswap_register_ops(&zswap_frontswap_ops); 9362b281117SSeth Jennings if (zswap_debugfs_init()) 9372b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 9382b281117SSeth Jennings return 0; 9392b281117SSeth Jennings pcpufail: 9402b281117SSeth Jennings zswap_comp_exit(); 9412b281117SSeth Jennings compfail: 942c119239bSFabian Frederick zswap_entry_cache_destroy(); 94360105e12SMinchan Kim cachefail: 94412d79d64SDan Streetman zpool_destroy_pool(zswap_pool); 9452b281117SSeth Jennings error: 9462b281117SSeth Jennings return -ENOMEM; 9472b281117SSeth Jennings } 9482b281117SSeth Jennings /* must be late so crypto has time to come up */ 9492b281117SSeth Jennings late_initcall(init_zswap); 9502b281117SSeth Jennings 9512b281117SSeth Jennings MODULE_LICENSE("GPL"); 95268386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 9532b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 954