xref: /linux/mm/zswap.c (revision c00ed16a9eb98a7fc076e227bdd95c1451ca1e6e)
12b281117SSeth Jennings /*
22b281117SSeth Jennings  * zswap.c - zswap driver file
32b281117SSeth Jennings  *
42b281117SSeth Jennings  * zswap is a backend for frontswap that takes pages that are in the process
52b281117SSeth Jennings  * of being swapped out and attempts to compress and store them in a
62b281117SSeth Jennings  * RAM-based memory pool.  This can result in a significant I/O reduction on
72b281117SSeth Jennings  * the swap device and, in the case where decompressing from RAM is faster
82b281117SSeth Jennings  * than reading from the swap device, can also improve workload performance.
92b281117SSeth Jennings  *
102b281117SSeth Jennings  * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
112b281117SSeth Jennings  *
122b281117SSeth Jennings  * This program is free software; you can redistribute it and/or
132b281117SSeth Jennings  * modify it under the terms of the GNU General Public License
142b281117SSeth Jennings  * as published by the Free Software Foundation; either version 2
152b281117SSeth Jennings  * of the License, or (at your option) any later version.
162b281117SSeth Jennings  *
172b281117SSeth Jennings  * This program is distributed in the hope that it will be useful,
182b281117SSeth Jennings  * but WITHOUT ANY WARRANTY; without even the implied warranty of
192b281117SSeth Jennings  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
202b281117SSeth Jennings  * GNU General Public License for more details.
212b281117SSeth Jennings */
222b281117SSeth Jennings 
232b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
242b281117SSeth Jennings 
252b281117SSeth Jennings #include <linux/module.h>
262b281117SSeth Jennings #include <linux/cpu.h>
272b281117SSeth Jennings #include <linux/highmem.h>
282b281117SSeth Jennings #include <linux/slab.h>
292b281117SSeth Jennings #include <linux/spinlock.h>
302b281117SSeth Jennings #include <linux/types.h>
312b281117SSeth Jennings #include <linux/atomic.h>
322b281117SSeth Jennings #include <linux/frontswap.h>
332b281117SSeth Jennings #include <linux/rbtree.h>
342b281117SSeth Jennings #include <linux/swap.h>
352b281117SSeth Jennings #include <linux/crypto.h>
362b281117SSeth Jennings #include <linux/mempool.h>
3712d79d64SDan Streetman #include <linux/zpool.h>
382b281117SSeth Jennings 
392b281117SSeth Jennings #include <linux/mm_types.h>
402b281117SSeth Jennings #include <linux/page-flags.h>
412b281117SSeth Jennings #include <linux/swapops.h>
422b281117SSeth Jennings #include <linux/writeback.h>
432b281117SSeth Jennings #include <linux/pagemap.h>
442b281117SSeth Jennings 
452b281117SSeth Jennings /*********************************
462b281117SSeth Jennings * statistics
472b281117SSeth Jennings **********************************/
4812d79d64SDan Streetman /* Total bytes used by the compressed storage */
4912d79d64SDan Streetman static u64 zswap_pool_total_size;
502b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */
512b281117SSeth Jennings static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
522b281117SSeth Jennings 
532b281117SSeth Jennings /*
542b281117SSeth Jennings  * The statistics below are not protected from concurrent access for
552b281117SSeth Jennings  * performance reasons so they may not be a 100% accurate.  However,
562b281117SSeth Jennings  * they do provide useful information on roughly how many times a
572b281117SSeth Jennings  * certain event is occurring.
582b281117SSeth Jennings */
592b281117SSeth Jennings 
602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */
612b281117SSeth Jennings static u64 zswap_pool_limit_hit;
622b281117SSeth Jennings /* Pages written back when pool limit was reached */
632b281117SSeth Jennings static u64 zswap_written_back_pages;
642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */
652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail;
662b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */
672b281117SSeth Jennings static u64 zswap_reject_compress_poor;
682b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */
692b281117SSeth Jennings static u64 zswap_reject_alloc_fail;
702b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */
712b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail;
722b281117SSeth Jennings /* Duplicate store was encountered (rare) */
732b281117SSeth Jennings static u64 zswap_duplicate_entry;
742b281117SSeth Jennings 
752b281117SSeth Jennings /*********************************
762b281117SSeth Jennings * tunables
772b281117SSeth Jennings **********************************/
78*c00ed16aSDan Streetman 
79*c00ed16aSDan Streetman /* Enable/disable zswap (disabled by default) */
80*c00ed16aSDan Streetman static bool zswap_enabled;
81*c00ed16aSDan Streetman module_param_named(enabled, zswap_enabled, bool, 0644);
822b281117SSeth Jennings 
832b281117SSeth Jennings /* Compressor to be used by zswap (fixed at boot for now) */
842b281117SSeth Jennings #define ZSWAP_COMPRESSOR_DEFAULT "lzo"
852b281117SSeth Jennings static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
8612ab028bSDan Streetman module_param_named(compressor, zswap_compressor, charp, 0444);
872b281117SSeth Jennings 
882b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */
892b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20;
902b281117SSeth Jennings module_param_named(max_pool_percent,
912b281117SSeth Jennings 			zswap_max_pool_percent, uint, 0644);
922b281117SSeth Jennings 
9312d79d64SDan Streetman /* Compressed storage to use */
9412d79d64SDan Streetman #define ZSWAP_ZPOOL_DEFAULT "zbud"
9512d79d64SDan Streetman static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
9612d79d64SDan Streetman module_param_named(zpool, zswap_zpool_type, charp, 0444);
9712d79d64SDan Streetman 
9812d79d64SDan Streetman /* zpool is shared by all of zswap backend  */
9912d79d64SDan Streetman static struct zpool *zswap_pool;
10060105e12SMinchan Kim 
1012b281117SSeth Jennings /*********************************
1022b281117SSeth Jennings * compression functions
1032b281117SSeth Jennings **********************************/
1042b281117SSeth Jennings /* per-cpu compression transforms */
1052b281117SSeth Jennings static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
1062b281117SSeth Jennings 
1072b281117SSeth Jennings enum comp_op {
1082b281117SSeth Jennings 	ZSWAP_COMPOP_COMPRESS,
1092b281117SSeth Jennings 	ZSWAP_COMPOP_DECOMPRESS
1102b281117SSeth Jennings };
1112b281117SSeth Jennings 
1122b281117SSeth Jennings static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
1132b281117SSeth Jennings 				u8 *dst, unsigned int *dlen)
1142b281117SSeth Jennings {
1152b281117SSeth Jennings 	struct crypto_comp *tfm;
1162b281117SSeth Jennings 	int ret;
1172b281117SSeth Jennings 
1182b281117SSeth Jennings 	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
1192b281117SSeth Jennings 	switch (op) {
1202b281117SSeth Jennings 	case ZSWAP_COMPOP_COMPRESS:
1212b281117SSeth Jennings 		ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
1222b281117SSeth Jennings 		break;
1232b281117SSeth Jennings 	case ZSWAP_COMPOP_DECOMPRESS:
1242b281117SSeth Jennings 		ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
1252b281117SSeth Jennings 		break;
1262b281117SSeth Jennings 	default:
1272b281117SSeth Jennings 		ret = -EINVAL;
1282b281117SSeth Jennings 	}
1292b281117SSeth Jennings 
1302b281117SSeth Jennings 	put_cpu();
1312b281117SSeth Jennings 	return ret;
1322b281117SSeth Jennings }
1332b281117SSeth Jennings 
1342b281117SSeth Jennings static int __init zswap_comp_init(void)
1352b281117SSeth Jennings {
1362b281117SSeth Jennings 	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
1372b281117SSeth Jennings 		pr_info("%s compressor not available\n", zswap_compressor);
1382b281117SSeth Jennings 		/* fall back to default compressor */
1392b281117SSeth Jennings 		zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
1402b281117SSeth Jennings 		if (!crypto_has_comp(zswap_compressor, 0, 0))
1412b281117SSeth Jennings 			/* can't even load the default compressor */
1422b281117SSeth Jennings 			return -ENODEV;
1432b281117SSeth Jennings 	}
1442b281117SSeth Jennings 	pr_info("using %s compressor\n", zswap_compressor);
1452b281117SSeth Jennings 
1462b281117SSeth Jennings 	/* alloc percpu transforms */
1472b281117SSeth Jennings 	zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
1482b281117SSeth Jennings 	if (!zswap_comp_pcpu_tfms)
1492b281117SSeth Jennings 		return -ENOMEM;
1502b281117SSeth Jennings 	return 0;
1512b281117SSeth Jennings }
1522b281117SSeth Jennings 
153dd01d7d8SMahendran Ganesh static void __init zswap_comp_exit(void)
1542b281117SSeth Jennings {
1552b281117SSeth Jennings 	/* free percpu transforms */
1562b281117SSeth Jennings 	free_percpu(zswap_comp_pcpu_tfms);
1572b281117SSeth Jennings }
1582b281117SSeth Jennings 
1592b281117SSeth Jennings /*********************************
1602b281117SSeth Jennings * data structures
1612b281117SSeth Jennings **********************************/
1622b281117SSeth Jennings /*
1632b281117SSeth Jennings  * struct zswap_entry
1642b281117SSeth Jennings  *
1652b281117SSeth Jennings  * This structure contains the metadata for tracking a single compressed
1662b281117SSeth Jennings  * page within zswap.
1672b281117SSeth Jennings  *
1682b281117SSeth Jennings  * rbnode - links the entry into red-black tree for the appropriate swap type
1692b281117SSeth Jennings  * refcount - the number of outstanding reference to the entry. This is needed
1702b281117SSeth Jennings  *            to protect against premature freeing of the entry by code
1716b452516SSeongJae Park  *            concurrent calls to load, invalidate, and writeback.  The lock
1722b281117SSeth Jennings  *            for the zswap_tree structure that contains the entry must
1732b281117SSeth Jennings  *            be held while changing the refcount.  Since the lock must
1742b281117SSeth Jennings  *            be held, there is no reason to also make refcount atomic.
1752b281117SSeth Jennings  * offset - the swap offset for the entry.  Index into the red-black tree.
17612d79d64SDan Streetman  * handle - zpool allocation handle that stores the compressed page data
1772b281117SSeth Jennings  * length - the length in bytes of the compressed page data.  Needed during
1782b281117SSeth Jennings  *          decompression
1792b281117SSeth Jennings  */
1802b281117SSeth Jennings struct zswap_entry {
1812b281117SSeth Jennings 	struct rb_node rbnode;
1822b281117SSeth Jennings 	pgoff_t offset;
1832b281117SSeth Jennings 	int refcount;
1842b281117SSeth Jennings 	unsigned int length;
1852b281117SSeth Jennings 	unsigned long handle;
1862b281117SSeth Jennings };
1872b281117SSeth Jennings 
1882b281117SSeth Jennings struct zswap_header {
1892b281117SSeth Jennings 	swp_entry_t swpentry;
1902b281117SSeth Jennings };
1912b281117SSeth Jennings 
1922b281117SSeth Jennings /*
1932b281117SSeth Jennings  * The tree lock in the zswap_tree struct protects a few things:
1942b281117SSeth Jennings  * - the rbtree
1952b281117SSeth Jennings  * - the refcount field of each entry in the tree
1962b281117SSeth Jennings  */
1972b281117SSeth Jennings struct zswap_tree {
1982b281117SSeth Jennings 	struct rb_root rbroot;
1992b281117SSeth Jennings 	spinlock_t lock;
2002b281117SSeth Jennings };
2012b281117SSeth Jennings 
2022b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
2032b281117SSeth Jennings 
2042b281117SSeth Jennings /*********************************
2052b281117SSeth Jennings * zswap entry functions
2062b281117SSeth Jennings **********************************/
2072b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache;
2082b281117SSeth Jennings 
209dd01d7d8SMahendran Ganesh static int __init zswap_entry_cache_create(void)
2102b281117SSeth Jennings {
2112b281117SSeth Jennings 	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
2125d2d42deSSeongJae Park 	return zswap_entry_cache == NULL;
2132b281117SSeth Jennings }
2142b281117SSeth Jennings 
215c119239bSFabian Frederick static void __init zswap_entry_cache_destroy(void)
2162b281117SSeth Jennings {
2172b281117SSeth Jennings 	kmem_cache_destroy(zswap_entry_cache);
2182b281117SSeth Jennings }
2192b281117SSeth Jennings 
2202b281117SSeth Jennings static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
2212b281117SSeth Jennings {
2222b281117SSeth Jennings 	struct zswap_entry *entry;
2232b281117SSeth Jennings 	entry = kmem_cache_alloc(zswap_entry_cache, gfp);
2242b281117SSeth Jennings 	if (!entry)
2252b281117SSeth Jennings 		return NULL;
2262b281117SSeth Jennings 	entry->refcount = 1;
2270ab0abcfSWeijie Yang 	RB_CLEAR_NODE(&entry->rbnode);
2282b281117SSeth Jennings 	return entry;
2292b281117SSeth Jennings }
2302b281117SSeth Jennings 
2312b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry)
2322b281117SSeth Jennings {
2332b281117SSeth Jennings 	kmem_cache_free(zswap_entry_cache, entry);
2342b281117SSeth Jennings }
2352b281117SSeth Jennings 
2362b281117SSeth Jennings /*********************************
2372b281117SSeth Jennings * rbtree functions
2382b281117SSeth Jennings **********************************/
2392b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
2402b281117SSeth Jennings {
2412b281117SSeth Jennings 	struct rb_node *node = root->rb_node;
2422b281117SSeth Jennings 	struct zswap_entry *entry;
2432b281117SSeth Jennings 
2442b281117SSeth Jennings 	while (node) {
2452b281117SSeth Jennings 		entry = rb_entry(node, struct zswap_entry, rbnode);
2462b281117SSeth Jennings 		if (entry->offset > offset)
2472b281117SSeth Jennings 			node = node->rb_left;
2482b281117SSeth Jennings 		else if (entry->offset < offset)
2492b281117SSeth Jennings 			node = node->rb_right;
2502b281117SSeth Jennings 		else
2512b281117SSeth Jennings 			return entry;
2522b281117SSeth Jennings 	}
2532b281117SSeth Jennings 	return NULL;
2542b281117SSeth Jennings }
2552b281117SSeth Jennings 
2562b281117SSeth Jennings /*
2572b281117SSeth Jennings  * In the case that a entry with the same offset is found, a pointer to
2582b281117SSeth Jennings  * the existing entry is stored in dupentry and the function returns -EEXIST
2592b281117SSeth Jennings  */
2602b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
2612b281117SSeth Jennings 			struct zswap_entry **dupentry)
2622b281117SSeth Jennings {
2632b281117SSeth Jennings 	struct rb_node **link = &root->rb_node, *parent = NULL;
2642b281117SSeth Jennings 	struct zswap_entry *myentry;
2652b281117SSeth Jennings 
2662b281117SSeth Jennings 	while (*link) {
2672b281117SSeth Jennings 		parent = *link;
2682b281117SSeth Jennings 		myentry = rb_entry(parent, struct zswap_entry, rbnode);
2692b281117SSeth Jennings 		if (myentry->offset > entry->offset)
2702b281117SSeth Jennings 			link = &(*link)->rb_left;
2712b281117SSeth Jennings 		else if (myentry->offset < entry->offset)
2722b281117SSeth Jennings 			link = &(*link)->rb_right;
2732b281117SSeth Jennings 		else {
2742b281117SSeth Jennings 			*dupentry = myentry;
2752b281117SSeth Jennings 			return -EEXIST;
2762b281117SSeth Jennings 		}
2772b281117SSeth Jennings 	}
2782b281117SSeth Jennings 	rb_link_node(&entry->rbnode, parent, link);
2792b281117SSeth Jennings 	rb_insert_color(&entry->rbnode, root);
2802b281117SSeth Jennings 	return 0;
2812b281117SSeth Jennings }
2822b281117SSeth Jennings 
2830ab0abcfSWeijie Yang static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
2840ab0abcfSWeijie Yang {
2850ab0abcfSWeijie Yang 	if (!RB_EMPTY_NODE(&entry->rbnode)) {
2860ab0abcfSWeijie Yang 		rb_erase(&entry->rbnode, root);
2870ab0abcfSWeijie Yang 		RB_CLEAR_NODE(&entry->rbnode);
2880ab0abcfSWeijie Yang 	}
2890ab0abcfSWeijie Yang }
2900ab0abcfSWeijie Yang 
2910ab0abcfSWeijie Yang /*
29212d79d64SDan Streetman  * Carries out the common pattern of freeing and entry's zpool allocation,
2930ab0abcfSWeijie Yang  * freeing the entry itself, and decrementing the number of stored pages.
2940ab0abcfSWeijie Yang  */
29560105e12SMinchan Kim static void zswap_free_entry(struct zswap_entry *entry)
2960ab0abcfSWeijie Yang {
29712d79d64SDan Streetman 	zpool_free(zswap_pool, entry->handle);
2980ab0abcfSWeijie Yang 	zswap_entry_cache_free(entry);
2990ab0abcfSWeijie Yang 	atomic_dec(&zswap_stored_pages);
30012d79d64SDan Streetman 	zswap_pool_total_size = zpool_get_total_size(zswap_pool);
3010ab0abcfSWeijie Yang }
3020ab0abcfSWeijie Yang 
3030ab0abcfSWeijie Yang /* caller must hold the tree lock */
3040ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry)
3050ab0abcfSWeijie Yang {
3060ab0abcfSWeijie Yang 	entry->refcount++;
3070ab0abcfSWeijie Yang }
3080ab0abcfSWeijie Yang 
3090ab0abcfSWeijie Yang /* caller must hold the tree lock
3100ab0abcfSWeijie Yang * remove from the tree and free it, if nobody reference the entry
3110ab0abcfSWeijie Yang */
3120ab0abcfSWeijie Yang static void zswap_entry_put(struct zswap_tree *tree,
3130ab0abcfSWeijie Yang 			struct zswap_entry *entry)
3140ab0abcfSWeijie Yang {
3150ab0abcfSWeijie Yang 	int refcount = --entry->refcount;
3160ab0abcfSWeijie Yang 
3170ab0abcfSWeijie Yang 	BUG_ON(refcount < 0);
3180ab0abcfSWeijie Yang 	if (refcount == 0) {
3190ab0abcfSWeijie Yang 		zswap_rb_erase(&tree->rbroot, entry);
32060105e12SMinchan Kim 		zswap_free_entry(entry);
3210ab0abcfSWeijie Yang 	}
3220ab0abcfSWeijie Yang }
3230ab0abcfSWeijie Yang 
3240ab0abcfSWeijie Yang /* caller must hold the tree lock */
3250ab0abcfSWeijie Yang static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
3260ab0abcfSWeijie Yang 				pgoff_t offset)
3270ab0abcfSWeijie Yang {
3280ab0abcfSWeijie Yang 	struct zswap_entry *entry = NULL;
3290ab0abcfSWeijie Yang 
3300ab0abcfSWeijie Yang 	entry = zswap_rb_search(root, offset);
3310ab0abcfSWeijie Yang 	if (entry)
3320ab0abcfSWeijie Yang 		zswap_entry_get(entry);
3330ab0abcfSWeijie Yang 
3340ab0abcfSWeijie Yang 	return entry;
3350ab0abcfSWeijie Yang }
3360ab0abcfSWeijie Yang 
3372b281117SSeth Jennings /*********************************
3382b281117SSeth Jennings * per-cpu code
3392b281117SSeth Jennings **********************************/
3402b281117SSeth Jennings static DEFINE_PER_CPU(u8 *, zswap_dstmem);
3412b281117SSeth Jennings 
3422b281117SSeth Jennings static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
3432b281117SSeth Jennings {
3442b281117SSeth Jennings 	struct crypto_comp *tfm;
3452b281117SSeth Jennings 	u8 *dst;
3462b281117SSeth Jennings 
3472b281117SSeth Jennings 	switch (action) {
3482b281117SSeth Jennings 	case CPU_UP_PREPARE:
3492b281117SSeth Jennings 		tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
3502b281117SSeth Jennings 		if (IS_ERR(tfm)) {
3512b281117SSeth Jennings 			pr_err("can't allocate compressor transform\n");
3522b281117SSeth Jennings 			return NOTIFY_BAD;
3532b281117SSeth Jennings 		}
3542b281117SSeth Jennings 		*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
35572d09633SEric Dumazet 		dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
3562b281117SSeth Jennings 		if (!dst) {
3572b281117SSeth Jennings 			pr_err("can't allocate compressor buffer\n");
3582b281117SSeth Jennings 			crypto_free_comp(tfm);
3592b281117SSeth Jennings 			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
3602b281117SSeth Jennings 			return NOTIFY_BAD;
3612b281117SSeth Jennings 		}
3622b281117SSeth Jennings 		per_cpu(zswap_dstmem, cpu) = dst;
3632b281117SSeth Jennings 		break;
3642b281117SSeth Jennings 	case CPU_DEAD:
3652b281117SSeth Jennings 	case CPU_UP_CANCELED:
3662b281117SSeth Jennings 		tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
3672b281117SSeth Jennings 		if (tfm) {
3682b281117SSeth Jennings 			crypto_free_comp(tfm);
3692b281117SSeth Jennings 			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
3702b281117SSeth Jennings 		}
3712b281117SSeth Jennings 		dst = per_cpu(zswap_dstmem, cpu);
3722b281117SSeth Jennings 		kfree(dst);
3732b281117SSeth Jennings 		per_cpu(zswap_dstmem, cpu) = NULL;
3742b281117SSeth Jennings 		break;
3752b281117SSeth Jennings 	default:
3762b281117SSeth Jennings 		break;
3772b281117SSeth Jennings 	}
3782b281117SSeth Jennings 	return NOTIFY_OK;
3792b281117SSeth Jennings }
3802b281117SSeth Jennings 
3812b281117SSeth Jennings static int zswap_cpu_notifier(struct notifier_block *nb,
3822b281117SSeth Jennings 				unsigned long action, void *pcpu)
3832b281117SSeth Jennings {
3842b281117SSeth Jennings 	unsigned long cpu = (unsigned long)pcpu;
3852b281117SSeth Jennings 	return __zswap_cpu_notifier(action, cpu);
3862b281117SSeth Jennings }
3872b281117SSeth Jennings 
3882b281117SSeth Jennings static struct notifier_block zswap_cpu_notifier_block = {
3892b281117SSeth Jennings 	.notifier_call = zswap_cpu_notifier
3902b281117SSeth Jennings };
3912b281117SSeth Jennings 
392dd01d7d8SMahendran Ganesh static int __init zswap_cpu_init(void)
3932b281117SSeth Jennings {
3942b281117SSeth Jennings 	unsigned long cpu;
3952b281117SSeth Jennings 
39657637824SSrivatsa S. Bhat 	cpu_notifier_register_begin();
3972b281117SSeth Jennings 	for_each_online_cpu(cpu)
3982b281117SSeth Jennings 		if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
3992b281117SSeth Jennings 			goto cleanup;
40057637824SSrivatsa S. Bhat 	__register_cpu_notifier(&zswap_cpu_notifier_block);
40157637824SSrivatsa S. Bhat 	cpu_notifier_register_done();
4022b281117SSeth Jennings 	return 0;
4032b281117SSeth Jennings 
4042b281117SSeth Jennings cleanup:
4052b281117SSeth Jennings 	for_each_online_cpu(cpu)
4062b281117SSeth Jennings 		__zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
40757637824SSrivatsa S. Bhat 	cpu_notifier_register_done();
4082b281117SSeth Jennings 	return -ENOMEM;
4092b281117SSeth Jennings }
4102b281117SSeth Jennings 
4112b281117SSeth Jennings /*********************************
4122b281117SSeth Jennings * helpers
4132b281117SSeth Jennings **********************************/
4142b281117SSeth Jennings static bool zswap_is_full(void)
4152b281117SSeth Jennings {
4165d2d42deSSeongJae Park 	return totalram_pages * zswap_max_pool_percent / 100 <
41712d79d64SDan Streetman 		DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
4182b281117SSeth Jennings }
4192b281117SSeth Jennings 
4202b281117SSeth Jennings /*********************************
4212b281117SSeth Jennings * writeback code
4222b281117SSeth Jennings **********************************/
4232b281117SSeth Jennings /* return enum for zswap_get_swap_cache_page */
4242b281117SSeth Jennings enum zswap_get_swap_ret {
4252b281117SSeth Jennings 	ZSWAP_SWAPCACHE_NEW,
4262b281117SSeth Jennings 	ZSWAP_SWAPCACHE_EXIST,
42767d13fe8SWeijie Yang 	ZSWAP_SWAPCACHE_FAIL,
4282b281117SSeth Jennings };
4292b281117SSeth Jennings 
4302b281117SSeth Jennings /*
4312b281117SSeth Jennings  * zswap_get_swap_cache_page
4322b281117SSeth Jennings  *
4332b281117SSeth Jennings  * This is an adaption of read_swap_cache_async()
4342b281117SSeth Jennings  *
4352b281117SSeth Jennings  * This function tries to find a page with the given swap entry
4362b281117SSeth Jennings  * in the swapper_space address space (the swap cache).  If the page
4372b281117SSeth Jennings  * is found, it is returned in retpage.  Otherwise, a page is allocated,
4382b281117SSeth Jennings  * added to the swap cache, and returned in retpage.
4392b281117SSeth Jennings  *
4402b281117SSeth Jennings  * If success, the swap cache page is returned in retpage
44167d13fe8SWeijie Yang  * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
44267d13fe8SWeijie Yang  * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
44367d13fe8SWeijie Yang  *     the new page is added to swapcache and locked
44467d13fe8SWeijie Yang  * Returns ZSWAP_SWAPCACHE_FAIL on error
4452b281117SSeth Jennings  */
4462b281117SSeth Jennings static int zswap_get_swap_cache_page(swp_entry_t entry,
4472b281117SSeth Jennings 				struct page **retpage)
4482b281117SSeth Jennings {
4492b281117SSeth Jennings 	struct page *found_page, *new_page = NULL;
450822518dcSSunghan Suh 	struct address_space *swapper_space = swap_address_space(entry);
4512b281117SSeth Jennings 	int err;
4522b281117SSeth Jennings 
4532b281117SSeth Jennings 	*retpage = NULL;
4542b281117SSeth Jennings 	do {
4552b281117SSeth Jennings 		/*
4562b281117SSeth Jennings 		 * First check the swap cache.  Since this is normally
4572b281117SSeth Jennings 		 * called after lookup_swap_cache() failed, re-calling
4582b281117SSeth Jennings 		 * that would confuse statistics.
4592b281117SSeth Jennings 		 */
4602b281117SSeth Jennings 		found_page = find_get_page(swapper_space, entry.val);
4612b281117SSeth Jennings 		if (found_page)
4622b281117SSeth Jennings 			break;
4632b281117SSeth Jennings 
4642b281117SSeth Jennings 		/*
4652b281117SSeth Jennings 		 * Get a new page to read into from swap.
4662b281117SSeth Jennings 		 */
4672b281117SSeth Jennings 		if (!new_page) {
4682b281117SSeth Jennings 			new_page = alloc_page(GFP_KERNEL);
4692b281117SSeth Jennings 			if (!new_page)
4702b281117SSeth Jennings 				break; /* Out of memory */
4712b281117SSeth Jennings 		}
4722b281117SSeth Jennings 
4732b281117SSeth Jennings 		/*
4742b281117SSeth Jennings 		 * call radix_tree_preload() while we can wait.
4752b281117SSeth Jennings 		 */
4762b281117SSeth Jennings 		err = radix_tree_preload(GFP_KERNEL);
4772b281117SSeth Jennings 		if (err)
4782b281117SSeth Jennings 			break;
4792b281117SSeth Jennings 
4802b281117SSeth Jennings 		/*
4812b281117SSeth Jennings 		 * Swap entry may have been freed since our caller observed it.
4822b281117SSeth Jennings 		 */
4832b281117SSeth Jennings 		err = swapcache_prepare(entry);
4842b281117SSeth Jennings 		if (err == -EEXIST) { /* seems racy */
4852b281117SSeth Jennings 			radix_tree_preload_end();
4862b281117SSeth Jennings 			continue;
4872b281117SSeth Jennings 		}
4882b281117SSeth Jennings 		if (err) { /* swp entry is obsolete ? */
4892b281117SSeth Jennings 			radix_tree_preload_end();
4902b281117SSeth Jennings 			break;
4912b281117SSeth Jennings 		}
4922b281117SSeth Jennings 
4932b281117SSeth Jennings 		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
4942b281117SSeth Jennings 		__set_page_locked(new_page);
4952b281117SSeth Jennings 		SetPageSwapBacked(new_page);
4962b281117SSeth Jennings 		err = __add_to_swap_cache(new_page, entry);
4972b281117SSeth Jennings 		if (likely(!err)) {
4982b281117SSeth Jennings 			radix_tree_preload_end();
4992b281117SSeth Jennings 			lru_cache_add_anon(new_page);
5002b281117SSeth Jennings 			*retpage = new_page;
5012b281117SSeth Jennings 			return ZSWAP_SWAPCACHE_NEW;
5022b281117SSeth Jennings 		}
5032b281117SSeth Jennings 		radix_tree_preload_end();
5042b281117SSeth Jennings 		ClearPageSwapBacked(new_page);
5052b281117SSeth Jennings 		__clear_page_locked(new_page);
5062b281117SSeth Jennings 		/*
5072b281117SSeth Jennings 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
5082b281117SSeth Jennings 		 * clear SWAP_HAS_CACHE flag.
5092b281117SSeth Jennings 		 */
5100a31bc97SJohannes Weiner 		swapcache_free(entry);
5112b281117SSeth Jennings 	} while (err != -ENOMEM);
5122b281117SSeth Jennings 
5132b281117SSeth Jennings 	if (new_page)
5142b281117SSeth Jennings 		page_cache_release(new_page);
5152b281117SSeth Jennings 	if (!found_page)
51667d13fe8SWeijie Yang 		return ZSWAP_SWAPCACHE_FAIL;
5172b281117SSeth Jennings 	*retpage = found_page;
5182b281117SSeth Jennings 	return ZSWAP_SWAPCACHE_EXIST;
5192b281117SSeth Jennings }
5202b281117SSeth Jennings 
5212b281117SSeth Jennings /*
5222b281117SSeth Jennings  * Attempts to free an entry by adding a page to the swap cache,
5232b281117SSeth Jennings  * decompressing the entry data into the page, and issuing a
5242b281117SSeth Jennings  * bio write to write the page back to the swap device.
5252b281117SSeth Jennings  *
5262b281117SSeth Jennings  * This can be thought of as a "resumed writeback" of the page
5272b281117SSeth Jennings  * to the swap device.  We are basically resuming the same swap
5282b281117SSeth Jennings  * writeback path that was intercepted with the frontswap_store()
5292b281117SSeth Jennings  * in the first place.  After the page has been decompressed into
5302b281117SSeth Jennings  * the swap cache, the compressed version stored by zswap can be
5312b281117SSeth Jennings  * freed.
5322b281117SSeth Jennings  */
53312d79d64SDan Streetman static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
5342b281117SSeth Jennings {
5352b281117SSeth Jennings 	struct zswap_header *zhdr;
5362b281117SSeth Jennings 	swp_entry_t swpentry;
5372b281117SSeth Jennings 	struct zswap_tree *tree;
5382b281117SSeth Jennings 	pgoff_t offset;
5392b281117SSeth Jennings 	struct zswap_entry *entry;
5402b281117SSeth Jennings 	struct page *page;
5412b281117SSeth Jennings 	u8 *src, *dst;
5422b281117SSeth Jennings 	unsigned int dlen;
5430ab0abcfSWeijie Yang 	int ret;
5442b281117SSeth Jennings 	struct writeback_control wbc = {
5452b281117SSeth Jennings 		.sync_mode = WB_SYNC_NONE,
5462b281117SSeth Jennings 	};
5472b281117SSeth Jennings 
5482b281117SSeth Jennings 	/* extract swpentry from data */
54912d79d64SDan Streetman 	zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
5502b281117SSeth Jennings 	swpentry = zhdr->swpentry; /* here */
55112d79d64SDan Streetman 	zpool_unmap_handle(pool, handle);
5522b281117SSeth Jennings 	tree = zswap_trees[swp_type(swpentry)];
5532b281117SSeth Jennings 	offset = swp_offset(swpentry);
5542b281117SSeth Jennings 
5552b281117SSeth Jennings 	/* find and ref zswap entry */
5562b281117SSeth Jennings 	spin_lock(&tree->lock);
5570ab0abcfSWeijie Yang 	entry = zswap_entry_find_get(&tree->rbroot, offset);
5582b281117SSeth Jennings 	if (!entry) {
5592b281117SSeth Jennings 		/* entry was invalidated */
5602b281117SSeth Jennings 		spin_unlock(&tree->lock);
5612b281117SSeth Jennings 		return 0;
5622b281117SSeth Jennings 	}
5632b281117SSeth Jennings 	spin_unlock(&tree->lock);
5642b281117SSeth Jennings 	BUG_ON(offset != entry->offset);
5652b281117SSeth Jennings 
5662b281117SSeth Jennings 	/* try to allocate swap cache page */
5672b281117SSeth Jennings 	switch (zswap_get_swap_cache_page(swpentry, &page)) {
56867d13fe8SWeijie Yang 	case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
5692b281117SSeth Jennings 		ret = -ENOMEM;
5702b281117SSeth Jennings 		goto fail;
5712b281117SSeth Jennings 
57267d13fe8SWeijie Yang 	case ZSWAP_SWAPCACHE_EXIST:
5732b281117SSeth Jennings 		/* page is already in the swap cache, ignore for now */
5742b281117SSeth Jennings 		page_cache_release(page);
5752b281117SSeth Jennings 		ret = -EEXIST;
5762b281117SSeth Jennings 		goto fail;
5772b281117SSeth Jennings 
5782b281117SSeth Jennings 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
5792b281117SSeth Jennings 		/* decompress */
5802b281117SSeth Jennings 		dlen = PAGE_SIZE;
58112d79d64SDan Streetman 		src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
58212d79d64SDan Streetman 				ZPOOL_MM_RO) + sizeof(struct zswap_header);
5832b281117SSeth Jennings 		dst = kmap_atomic(page);
5842b281117SSeth Jennings 		ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
5852b281117SSeth Jennings 				entry->length, dst, &dlen);
5862b281117SSeth Jennings 		kunmap_atomic(dst);
58712d79d64SDan Streetman 		zpool_unmap_handle(zswap_pool, entry->handle);
5882b281117SSeth Jennings 		BUG_ON(ret);
5892b281117SSeth Jennings 		BUG_ON(dlen != PAGE_SIZE);
5902b281117SSeth Jennings 
5912b281117SSeth Jennings 		/* page is up to date */
5922b281117SSeth Jennings 		SetPageUptodate(page);
5932b281117SSeth Jennings 	}
5942b281117SSeth Jennings 
595b349acc7SWeijie Yang 	/* move it to the tail of the inactive list after end_writeback */
596b349acc7SWeijie Yang 	SetPageReclaim(page);
597b349acc7SWeijie Yang 
5982b281117SSeth Jennings 	/* start writeback */
5992b281117SSeth Jennings 	__swap_writepage(page, &wbc, end_swap_bio_write);
6002b281117SSeth Jennings 	page_cache_release(page);
6012b281117SSeth Jennings 	zswap_written_back_pages++;
6022b281117SSeth Jennings 
6032b281117SSeth Jennings 	spin_lock(&tree->lock);
6042b281117SSeth Jennings 	/* drop local reference */
6050ab0abcfSWeijie Yang 	zswap_entry_put(tree, entry);
6062b281117SSeth Jennings 
6072b281117SSeth Jennings 	/*
6080ab0abcfSWeijie Yang 	* There are two possible situations for entry here:
6090ab0abcfSWeijie Yang 	* (1) refcount is 1(normal case),  entry is valid and on the tree
6100ab0abcfSWeijie Yang 	* (2) refcount is 0, entry is freed and not on the tree
6110ab0abcfSWeijie Yang 	*     because invalidate happened during writeback
6120ab0abcfSWeijie Yang 	*  search the tree and free the entry if find entry
6132b281117SSeth Jennings 	*/
6140ab0abcfSWeijie Yang 	if (entry == zswap_rb_search(&tree->rbroot, offset))
6150ab0abcfSWeijie Yang 		zswap_entry_put(tree, entry);
6162b281117SSeth Jennings 	spin_unlock(&tree->lock);
6172b281117SSeth Jennings 
6180ab0abcfSWeijie Yang 	goto end;
6190ab0abcfSWeijie Yang 
6200ab0abcfSWeijie Yang 	/*
6210ab0abcfSWeijie Yang 	* if we get here due to ZSWAP_SWAPCACHE_EXIST
6220ab0abcfSWeijie Yang 	* a load may happening concurrently
6230ab0abcfSWeijie Yang 	* it is safe and okay to not free the entry
6240ab0abcfSWeijie Yang 	* if we free the entry in the following put
6250ab0abcfSWeijie Yang 	* it it either okay to return !0
6260ab0abcfSWeijie Yang 	*/
6272b281117SSeth Jennings fail:
6282b281117SSeth Jennings 	spin_lock(&tree->lock);
6290ab0abcfSWeijie Yang 	zswap_entry_put(tree, entry);
6302b281117SSeth Jennings 	spin_unlock(&tree->lock);
6310ab0abcfSWeijie Yang 
6320ab0abcfSWeijie Yang end:
6332b281117SSeth Jennings 	return ret;
6342b281117SSeth Jennings }
6352b281117SSeth Jennings 
6362b281117SSeth Jennings /*********************************
6372b281117SSeth Jennings * frontswap hooks
6382b281117SSeth Jennings **********************************/
6392b281117SSeth Jennings /* attempts to compress and store an single page */
6402b281117SSeth Jennings static int zswap_frontswap_store(unsigned type, pgoff_t offset,
6412b281117SSeth Jennings 				struct page *page)
6422b281117SSeth Jennings {
6432b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
6442b281117SSeth Jennings 	struct zswap_entry *entry, *dupentry;
6452b281117SSeth Jennings 	int ret;
6462b281117SSeth Jennings 	unsigned int dlen = PAGE_SIZE, len;
6472b281117SSeth Jennings 	unsigned long handle;
6482b281117SSeth Jennings 	char *buf;
6492b281117SSeth Jennings 	u8 *src, *dst;
6502b281117SSeth Jennings 	struct zswap_header *zhdr;
6512b281117SSeth Jennings 
652*c00ed16aSDan Streetman 	if (!zswap_enabled || !tree) {
6532b281117SSeth Jennings 		ret = -ENODEV;
6542b281117SSeth Jennings 		goto reject;
6552b281117SSeth Jennings 	}
6562b281117SSeth Jennings 
6572b281117SSeth Jennings 	/* reclaim space if needed */
6582b281117SSeth Jennings 	if (zswap_is_full()) {
6592b281117SSeth Jennings 		zswap_pool_limit_hit++;
66012d79d64SDan Streetman 		if (zpool_shrink(zswap_pool, 1, NULL)) {
6612b281117SSeth Jennings 			zswap_reject_reclaim_fail++;
6622b281117SSeth Jennings 			ret = -ENOMEM;
6632b281117SSeth Jennings 			goto reject;
6642b281117SSeth Jennings 		}
6652b281117SSeth Jennings 	}
6662b281117SSeth Jennings 
6672b281117SSeth Jennings 	/* allocate entry */
6682b281117SSeth Jennings 	entry = zswap_entry_cache_alloc(GFP_KERNEL);
6692b281117SSeth Jennings 	if (!entry) {
6702b281117SSeth Jennings 		zswap_reject_kmemcache_fail++;
6712b281117SSeth Jennings 		ret = -ENOMEM;
6722b281117SSeth Jennings 		goto reject;
6732b281117SSeth Jennings 	}
6742b281117SSeth Jennings 
6752b281117SSeth Jennings 	/* compress */
6762b281117SSeth Jennings 	dst = get_cpu_var(zswap_dstmem);
6772b281117SSeth Jennings 	src = kmap_atomic(page);
6782b281117SSeth Jennings 	ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
6792b281117SSeth Jennings 	kunmap_atomic(src);
6802b281117SSeth Jennings 	if (ret) {
6812b281117SSeth Jennings 		ret = -EINVAL;
6822b281117SSeth Jennings 		goto freepage;
6832b281117SSeth Jennings 	}
6842b281117SSeth Jennings 
6852b281117SSeth Jennings 	/* store */
6862b281117SSeth Jennings 	len = dlen + sizeof(struct zswap_header);
68712d79d64SDan Streetman 	ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
6882b281117SSeth Jennings 		&handle);
6892b281117SSeth Jennings 	if (ret == -ENOSPC) {
6902b281117SSeth Jennings 		zswap_reject_compress_poor++;
6912b281117SSeth Jennings 		goto freepage;
6922b281117SSeth Jennings 	}
6932b281117SSeth Jennings 	if (ret) {
6942b281117SSeth Jennings 		zswap_reject_alloc_fail++;
6952b281117SSeth Jennings 		goto freepage;
6962b281117SSeth Jennings 	}
69712d79d64SDan Streetman 	zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
6982b281117SSeth Jennings 	zhdr->swpentry = swp_entry(type, offset);
6992b281117SSeth Jennings 	buf = (u8 *)(zhdr + 1);
7002b281117SSeth Jennings 	memcpy(buf, dst, dlen);
70112d79d64SDan Streetman 	zpool_unmap_handle(zswap_pool, handle);
7022b281117SSeth Jennings 	put_cpu_var(zswap_dstmem);
7032b281117SSeth Jennings 
7042b281117SSeth Jennings 	/* populate entry */
7052b281117SSeth Jennings 	entry->offset = offset;
7062b281117SSeth Jennings 	entry->handle = handle;
7072b281117SSeth Jennings 	entry->length = dlen;
7082b281117SSeth Jennings 
7092b281117SSeth Jennings 	/* map */
7102b281117SSeth Jennings 	spin_lock(&tree->lock);
7112b281117SSeth Jennings 	do {
7122b281117SSeth Jennings 		ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
7132b281117SSeth Jennings 		if (ret == -EEXIST) {
7142b281117SSeth Jennings 			zswap_duplicate_entry++;
7152b281117SSeth Jennings 			/* remove from rbtree */
7160ab0abcfSWeijie Yang 			zswap_rb_erase(&tree->rbroot, dupentry);
7170ab0abcfSWeijie Yang 			zswap_entry_put(tree, dupentry);
7182b281117SSeth Jennings 		}
7192b281117SSeth Jennings 	} while (ret == -EEXIST);
7202b281117SSeth Jennings 	spin_unlock(&tree->lock);
7212b281117SSeth Jennings 
7222b281117SSeth Jennings 	/* update stats */
7232b281117SSeth Jennings 	atomic_inc(&zswap_stored_pages);
72412d79d64SDan Streetman 	zswap_pool_total_size = zpool_get_total_size(zswap_pool);
7252b281117SSeth Jennings 
7262b281117SSeth Jennings 	return 0;
7272b281117SSeth Jennings 
7282b281117SSeth Jennings freepage:
7292b281117SSeth Jennings 	put_cpu_var(zswap_dstmem);
7302b281117SSeth Jennings 	zswap_entry_cache_free(entry);
7312b281117SSeth Jennings reject:
7322b281117SSeth Jennings 	return ret;
7332b281117SSeth Jennings }
7342b281117SSeth Jennings 
7352b281117SSeth Jennings /*
7362b281117SSeth Jennings  * returns 0 if the page was successfully decompressed
7372b281117SSeth Jennings  * return -1 on entry not found or error
7382b281117SSeth Jennings */
7392b281117SSeth Jennings static int zswap_frontswap_load(unsigned type, pgoff_t offset,
7402b281117SSeth Jennings 				struct page *page)
7412b281117SSeth Jennings {
7422b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
7432b281117SSeth Jennings 	struct zswap_entry *entry;
7442b281117SSeth Jennings 	u8 *src, *dst;
7452b281117SSeth Jennings 	unsigned int dlen;
7460ab0abcfSWeijie Yang 	int ret;
7472b281117SSeth Jennings 
7482b281117SSeth Jennings 	/* find */
7492b281117SSeth Jennings 	spin_lock(&tree->lock);
7500ab0abcfSWeijie Yang 	entry = zswap_entry_find_get(&tree->rbroot, offset);
7512b281117SSeth Jennings 	if (!entry) {
7522b281117SSeth Jennings 		/* entry was written back */
7532b281117SSeth Jennings 		spin_unlock(&tree->lock);
7542b281117SSeth Jennings 		return -1;
7552b281117SSeth Jennings 	}
7562b281117SSeth Jennings 	spin_unlock(&tree->lock);
7572b281117SSeth Jennings 
7582b281117SSeth Jennings 	/* decompress */
7592b281117SSeth Jennings 	dlen = PAGE_SIZE;
76012d79d64SDan Streetman 	src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
76112d79d64SDan Streetman 			ZPOOL_MM_RO) + sizeof(struct zswap_header);
7622b281117SSeth Jennings 	dst = kmap_atomic(page);
7632b281117SSeth Jennings 	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
7642b281117SSeth Jennings 		dst, &dlen);
7652b281117SSeth Jennings 	kunmap_atomic(dst);
76612d79d64SDan Streetman 	zpool_unmap_handle(zswap_pool, entry->handle);
7672b281117SSeth Jennings 	BUG_ON(ret);
7682b281117SSeth Jennings 
7692b281117SSeth Jennings 	spin_lock(&tree->lock);
7700ab0abcfSWeijie Yang 	zswap_entry_put(tree, entry);
7712b281117SSeth Jennings 	spin_unlock(&tree->lock);
7722b281117SSeth Jennings 
7732b281117SSeth Jennings 	return 0;
7742b281117SSeth Jennings }
7752b281117SSeth Jennings 
7762b281117SSeth Jennings /* frees an entry in zswap */
7772b281117SSeth Jennings static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
7782b281117SSeth Jennings {
7792b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
7802b281117SSeth Jennings 	struct zswap_entry *entry;
7812b281117SSeth Jennings 
7822b281117SSeth Jennings 	/* find */
7832b281117SSeth Jennings 	spin_lock(&tree->lock);
7842b281117SSeth Jennings 	entry = zswap_rb_search(&tree->rbroot, offset);
7852b281117SSeth Jennings 	if (!entry) {
7862b281117SSeth Jennings 		/* entry was written back */
7872b281117SSeth Jennings 		spin_unlock(&tree->lock);
7882b281117SSeth Jennings 		return;
7892b281117SSeth Jennings 	}
7902b281117SSeth Jennings 
7912b281117SSeth Jennings 	/* remove from rbtree */
7920ab0abcfSWeijie Yang 	zswap_rb_erase(&tree->rbroot, entry);
7932b281117SSeth Jennings 
7942b281117SSeth Jennings 	/* drop the initial reference from entry creation */
7950ab0abcfSWeijie Yang 	zswap_entry_put(tree, entry);
7962b281117SSeth Jennings 
7972b281117SSeth Jennings 	spin_unlock(&tree->lock);
7982b281117SSeth Jennings }
7992b281117SSeth Jennings 
8002b281117SSeth Jennings /* frees all zswap entries for the given swap type */
8012b281117SSeth Jennings static void zswap_frontswap_invalidate_area(unsigned type)
8022b281117SSeth Jennings {
8032b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
8040bd42136SCody P Schafer 	struct zswap_entry *entry, *n;
8052b281117SSeth Jennings 
8062b281117SSeth Jennings 	if (!tree)
8072b281117SSeth Jennings 		return;
8082b281117SSeth Jennings 
8092b281117SSeth Jennings 	/* walk the tree and free everything */
8102b281117SSeth Jennings 	spin_lock(&tree->lock);
8110ab0abcfSWeijie Yang 	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
81260105e12SMinchan Kim 		zswap_free_entry(entry);
8132b281117SSeth Jennings 	tree->rbroot = RB_ROOT;
8142b281117SSeth Jennings 	spin_unlock(&tree->lock);
815aa9bca05SWeijie Yang 	kfree(tree);
816aa9bca05SWeijie Yang 	zswap_trees[type] = NULL;
8172b281117SSeth Jennings }
8182b281117SSeth Jennings 
81912d79d64SDan Streetman static struct zpool_ops zswap_zpool_ops = {
8202b281117SSeth Jennings 	.evict = zswap_writeback_entry
8212b281117SSeth Jennings };
8222b281117SSeth Jennings 
8232b281117SSeth Jennings static void zswap_frontswap_init(unsigned type)
8242b281117SSeth Jennings {
8252b281117SSeth Jennings 	struct zswap_tree *tree;
8262b281117SSeth Jennings 
8272b281117SSeth Jennings 	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
82860105e12SMinchan Kim 	if (!tree) {
82960105e12SMinchan Kim 		pr_err("alloc failed, zswap disabled for swap type %d\n", type);
83060105e12SMinchan Kim 		return;
83160105e12SMinchan Kim 	}
83260105e12SMinchan Kim 
8332b281117SSeth Jennings 	tree->rbroot = RB_ROOT;
8342b281117SSeth Jennings 	spin_lock_init(&tree->lock);
8352b281117SSeth Jennings 	zswap_trees[type] = tree;
8362b281117SSeth Jennings }
8372b281117SSeth Jennings 
8382b281117SSeth Jennings static struct frontswap_ops zswap_frontswap_ops = {
8392b281117SSeth Jennings 	.store = zswap_frontswap_store,
8402b281117SSeth Jennings 	.load = zswap_frontswap_load,
8412b281117SSeth Jennings 	.invalidate_page = zswap_frontswap_invalidate_page,
8422b281117SSeth Jennings 	.invalidate_area = zswap_frontswap_invalidate_area,
8432b281117SSeth Jennings 	.init = zswap_frontswap_init
8442b281117SSeth Jennings };
8452b281117SSeth Jennings 
8462b281117SSeth Jennings /*********************************
8472b281117SSeth Jennings * debugfs functions
8482b281117SSeth Jennings **********************************/
8492b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS
8502b281117SSeth Jennings #include <linux/debugfs.h>
8512b281117SSeth Jennings 
8522b281117SSeth Jennings static struct dentry *zswap_debugfs_root;
8532b281117SSeth Jennings 
8542b281117SSeth Jennings static int __init zswap_debugfs_init(void)
8552b281117SSeth Jennings {
8562b281117SSeth Jennings 	if (!debugfs_initialized())
8572b281117SSeth Jennings 		return -ENODEV;
8582b281117SSeth Jennings 
8592b281117SSeth Jennings 	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
8602b281117SSeth Jennings 	if (!zswap_debugfs_root)
8612b281117SSeth Jennings 		return -ENOMEM;
8622b281117SSeth Jennings 
8632b281117SSeth Jennings 	debugfs_create_u64("pool_limit_hit", S_IRUGO,
8642b281117SSeth Jennings 			zswap_debugfs_root, &zswap_pool_limit_hit);
8652b281117SSeth Jennings 	debugfs_create_u64("reject_reclaim_fail", S_IRUGO,
8662b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_reclaim_fail);
8672b281117SSeth Jennings 	debugfs_create_u64("reject_alloc_fail", S_IRUGO,
8682b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_alloc_fail);
8692b281117SSeth Jennings 	debugfs_create_u64("reject_kmemcache_fail", S_IRUGO,
8702b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_kmemcache_fail);
8712b281117SSeth Jennings 	debugfs_create_u64("reject_compress_poor", S_IRUGO,
8722b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_compress_poor);
8732b281117SSeth Jennings 	debugfs_create_u64("written_back_pages", S_IRUGO,
8742b281117SSeth Jennings 			zswap_debugfs_root, &zswap_written_back_pages);
8752b281117SSeth Jennings 	debugfs_create_u64("duplicate_entry", S_IRUGO,
8762b281117SSeth Jennings 			zswap_debugfs_root, &zswap_duplicate_entry);
87712d79d64SDan Streetman 	debugfs_create_u64("pool_total_size", S_IRUGO,
87812d79d64SDan Streetman 			zswap_debugfs_root, &zswap_pool_total_size);
8792b281117SSeth Jennings 	debugfs_create_atomic_t("stored_pages", S_IRUGO,
8802b281117SSeth Jennings 			zswap_debugfs_root, &zswap_stored_pages);
8812b281117SSeth Jennings 
8822b281117SSeth Jennings 	return 0;
8832b281117SSeth Jennings }
8842b281117SSeth Jennings 
8852b281117SSeth Jennings static void __exit zswap_debugfs_exit(void)
8862b281117SSeth Jennings {
8872b281117SSeth Jennings 	debugfs_remove_recursive(zswap_debugfs_root);
8882b281117SSeth Jennings }
8892b281117SSeth Jennings #else
8902b281117SSeth Jennings static int __init zswap_debugfs_init(void)
8912b281117SSeth Jennings {
8922b281117SSeth Jennings 	return 0;
8932b281117SSeth Jennings }
8942b281117SSeth Jennings 
8952b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) { }
8962b281117SSeth Jennings #endif
8972b281117SSeth Jennings 
8982b281117SSeth Jennings /*********************************
8992b281117SSeth Jennings * module init and exit
9002b281117SSeth Jennings **********************************/
9012b281117SSeth Jennings static int __init init_zswap(void)
9022b281117SSeth Jennings {
90312d79d64SDan Streetman 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
90412d79d64SDan Streetman 
9052b281117SSeth Jennings 	pr_info("loading zswap\n");
90660105e12SMinchan Kim 
9073eba0c6aSGanesh Mahendran 	zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
9083eba0c6aSGanesh Mahendran 					&zswap_zpool_ops);
90912d79d64SDan Streetman 	if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
91012d79d64SDan Streetman 		pr_info("%s zpool not available\n", zswap_zpool_type);
91112d79d64SDan Streetman 		zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
9123eba0c6aSGanesh Mahendran 		zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
91312d79d64SDan Streetman 					&zswap_zpool_ops);
91412d79d64SDan Streetman 	}
91560105e12SMinchan Kim 	if (!zswap_pool) {
91612d79d64SDan Streetman 		pr_err("%s zpool not available\n", zswap_zpool_type);
91712d79d64SDan Streetman 		pr_err("zpool creation failed\n");
91860105e12SMinchan Kim 		goto error;
91960105e12SMinchan Kim 	}
92012d79d64SDan Streetman 	pr_info("using %s pool\n", zswap_zpool_type);
92160105e12SMinchan Kim 
9222b281117SSeth Jennings 	if (zswap_entry_cache_create()) {
9232b281117SSeth Jennings 		pr_err("entry cache creation failed\n");
92460105e12SMinchan Kim 		goto cachefail;
9252b281117SSeth Jennings 	}
9262b281117SSeth Jennings 	if (zswap_comp_init()) {
9272b281117SSeth Jennings 		pr_err("compressor initialization failed\n");
9282b281117SSeth Jennings 		goto compfail;
9292b281117SSeth Jennings 	}
9302b281117SSeth Jennings 	if (zswap_cpu_init()) {
9312b281117SSeth Jennings 		pr_err("per-cpu initialization failed\n");
9322b281117SSeth Jennings 		goto pcpufail;
9332b281117SSeth Jennings 	}
93460105e12SMinchan Kim 
9352b281117SSeth Jennings 	frontswap_register_ops(&zswap_frontswap_ops);
9362b281117SSeth Jennings 	if (zswap_debugfs_init())
9372b281117SSeth Jennings 		pr_warn("debugfs initialization failed\n");
9382b281117SSeth Jennings 	return 0;
9392b281117SSeth Jennings pcpufail:
9402b281117SSeth Jennings 	zswap_comp_exit();
9412b281117SSeth Jennings compfail:
942c119239bSFabian Frederick 	zswap_entry_cache_destroy();
94360105e12SMinchan Kim cachefail:
94412d79d64SDan Streetman 	zpool_destroy_pool(zswap_pool);
9452b281117SSeth Jennings error:
9462b281117SSeth Jennings 	return -ENOMEM;
9472b281117SSeth Jennings }
9482b281117SSeth Jennings /* must be late so crypto has time to come up */
9492b281117SSeth Jennings late_initcall(init_zswap);
9502b281117SSeth Jennings 
9512b281117SSeth Jennings MODULE_LICENSE("GPL");
95268386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
9532b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages");
954