xref: /linux/mm/zswap.c (revision 506a86c5e2217cfb1884ea4806dc74c82d058733)
1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
22b281117SSeth Jennings /*
32b281117SSeth Jennings  * zswap.c - zswap driver file
42b281117SSeth Jennings  *
542c06a0eSJohannes Weiner  * zswap is a cache that takes pages that are in the process
62b281117SSeth Jennings  * of being swapped out and attempts to compress and store them in a
72b281117SSeth Jennings  * RAM-based memory pool.  This can result in a significant I/O reduction on
82b281117SSeth Jennings  * the swap device and, in the case where decompressing from RAM is faster
92b281117SSeth Jennings  * than reading from the swap device, can also improve workload performance.
102b281117SSeth Jennings  *
112b281117SSeth Jennings  * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
122b281117SSeth Jennings */
132b281117SSeth Jennings 
142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
152b281117SSeth Jennings 
162b281117SSeth Jennings #include <linux/module.h>
172b281117SSeth Jennings #include <linux/cpu.h>
182b281117SSeth Jennings #include <linux/highmem.h>
192b281117SSeth Jennings #include <linux/slab.h>
202b281117SSeth Jennings #include <linux/spinlock.h>
212b281117SSeth Jennings #include <linux/types.h>
222b281117SSeth Jennings #include <linux/atomic.h>
232b281117SSeth Jennings #include <linux/rbtree.h>
242b281117SSeth Jennings #include <linux/swap.h>
252b281117SSeth Jennings #include <linux/crypto.h>
261ec3b5feSBarry Song #include <linux/scatterlist.h>
27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h>
282b281117SSeth Jennings #include <linux/mempool.h>
2912d79d64SDan Streetman #include <linux/zpool.h>
301ec3b5feSBarry Song #include <crypto/acompress.h>
3142c06a0eSJohannes Weiner #include <linux/zswap.h>
322b281117SSeth Jennings #include <linux/mm_types.h>
332b281117SSeth Jennings #include <linux/page-flags.h>
342b281117SSeth Jennings #include <linux/swapops.h>
352b281117SSeth Jennings #include <linux/writeback.h>
362b281117SSeth Jennings #include <linux/pagemap.h>
3745190f01SVitaly Wool #include <linux/workqueue.h>
38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h>
392b281117SSeth Jennings 
40014bb1deSNeilBrown #include "swap.h"
41e0228d59SDomenico Cerasuolo #include "internal.h"
42014bb1deSNeilBrown 
432b281117SSeth Jennings /*********************************
442b281117SSeth Jennings * statistics
452b281117SSeth Jennings **********************************/
4612d79d64SDan Streetman /* Total bytes used by the compressed storage */
47f6498b77SJohannes Weiner u64 zswap_pool_total_size;
482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */
49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0);
50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */
51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
522b281117SSeth Jennings 
532b281117SSeth Jennings /*
542b281117SSeth Jennings  * The statistics below are not protected from concurrent access for
552b281117SSeth Jennings  * performance reasons so they may not be a 100% accurate.  However,
562b281117SSeth Jennings  * they do provide useful information on roughly how many times a
572b281117SSeth Jennings  * certain event is occurring.
582b281117SSeth Jennings */
592b281117SSeth Jennings 
602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */
612b281117SSeth Jennings static u64 zswap_pool_limit_hit;
622b281117SSeth Jennings /* Pages written back when pool limit was reached */
632b281117SSeth Jennings static u64 zswap_written_back_pages;
642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */
652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail;
66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */
67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail;
682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */
692b281117SSeth Jennings static u64 zswap_reject_compress_poor;
702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */
712b281117SSeth Jennings static u64 zswap_reject_alloc_fail;
722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */
732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail;
742b281117SSeth Jennings /* Duplicate store was encountered (rare) */
752b281117SSeth Jennings static u64 zswap_duplicate_entry;
762b281117SSeth Jennings 
7745190f01SVitaly Wool /* Shrinker work queue */
7845190f01SVitaly Wool static struct workqueue_struct *shrink_wq;
7945190f01SVitaly Wool /* Pool limit was hit, we need to calm down */
8045190f01SVitaly Wool static bool zswap_pool_reached_full;
8145190f01SVitaly Wool 
822b281117SSeth Jennings /*********************************
832b281117SSeth Jennings * tunables
842b281117SSeth Jennings **********************************/
85c00ed16aSDan Streetman 
86bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET ""
87bae21db8SDan Streetman 
88141fdeecSLiu Shixin static int zswap_setup(void);
89141fdeecSLiu Shixin 
90bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */
91bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
92d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *,
93d7b028f5SDan Streetman 				   const struct kernel_param *);
9483aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = {
95d7b028f5SDan Streetman 	.set =		zswap_enabled_param_set,
96d7b028f5SDan Streetman 	.get =		param_get_bool,
97d7b028f5SDan Streetman };
98d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
992b281117SSeth Jennings 
10090b0fc26SDan Streetman /* Crypto compressor to use */
101bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
10290b0fc26SDan Streetman static int zswap_compressor_param_set(const char *,
10390b0fc26SDan Streetman 				      const struct kernel_param *);
10483aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = {
10590b0fc26SDan Streetman 	.set =		zswap_compressor_param_set,
106c99b42c3SDan Streetman 	.get =		param_get_charp,
107c99b42c3SDan Streetman 	.free =		param_free_charp,
10890b0fc26SDan Streetman };
10990b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops,
110c99b42c3SDan Streetman 		&zswap_compressor, 0644);
11190b0fc26SDan Streetman 
11290b0fc26SDan Streetman /* Compressed storage zpool to use */
113bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
11490b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *);
11583aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = {
11690b0fc26SDan Streetman 	.set =		zswap_zpool_param_set,
117c99b42c3SDan Streetman 	.get =		param_get_charp,
118c99b42c3SDan Streetman 	.free =		param_free_charp,
11990b0fc26SDan Streetman };
120c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
1212b281117SSeth Jennings 
1222b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */
1232b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20;
12490b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
12560105e12SMinchan Kim 
12645190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */
12745190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
12845190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
12945190f01SVitaly Wool 		   uint, 0644);
13045190f01SVitaly Wool 
131cb325dddSMaciej S. Szmigiero /*
132cb325dddSMaciej S. Szmigiero  * Enable/disable handling same-value filled pages (enabled by default).
133cb325dddSMaciej S. Szmigiero  * If disabled every page is considered non-same-value filled.
134cb325dddSMaciej S. Szmigiero  */
135a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true;
136a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
137a85f878bSSrividya Desireddy 		   bool, 0644);
138a85f878bSSrividya Desireddy 
139cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */
140cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true;
141cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
142cb325dddSMaciej S. Szmigiero 		   bool, 0644);
143cb325dddSMaciej S. Szmigiero 
144b9c91c43SYosry Ahmed static bool zswap_exclusive_loads_enabled = IS_ENABLED(
145b9c91c43SYosry Ahmed 		CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
146b9c91c43SYosry Ahmed module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
147b9c91c43SYosry Ahmed 
148b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */
149b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32
150b8cf32dcSYosry Ahmed 
151b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */
152b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED(
153b5ba474fSNhat Pham 		CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
154b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
155b5ba474fSNhat Pham 
156501a06feSNhat Pham bool is_zswap_enabled(void)
157501a06feSNhat Pham {
158501a06feSNhat Pham 	return zswap_enabled;
159501a06feSNhat Pham }
160501a06feSNhat Pham 
1612b281117SSeth Jennings /*********************************
1622b281117SSeth Jennings * data structures
1632b281117SSeth Jennings **********************************/
164f1c54846SDan Streetman 
1651ec3b5feSBarry Song struct crypto_acomp_ctx {
1661ec3b5feSBarry Song 	struct crypto_acomp *acomp;
1671ec3b5feSBarry Song 	struct acomp_req *req;
1681ec3b5feSBarry Song 	struct crypto_wait wait;
1698ba2f844SChengming Zhou 	u8 *buffer;
1708ba2f844SChengming Zhou 	struct mutex mutex;
1711ec3b5feSBarry Song };
1721ec3b5feSBarry Song 
173f999f38bSDomenico Cerasuolo /*
174f999f38bSDomenico Cerasuolo  * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
175f999f38bSDomenico Cerasuolo  * The only case where lru_lock is not acquired while holding tree.lock is
176f999f38bSDomenico Cerasuolo  * when a zswap_entry is taken off the lru for writeback, in that case it
177f999f38bSDomenico Cerasuolo  * needs to be verified that it's still valid in the tree.
178f999f38bSDomenico Cerasuolo  */
179f1c54846SDan Streetman struct zswap_pool {
180b8cf32dcSYosry Ahmed 	struct zpool *zpools[ZSWAP_NR_ZPOOLS];
1811ec3b5feSBarry Song 	struct crypto_acomp_ctx __percpu *acomp_ctx;
182f1c54846SDan Streetman 	struct kref kref;
183f1c54846SDan Streetman 	struct list_head list;
18445190f01SVitaly Wool 	struct work_struct release_work;
18545190f01SVitaly Wool 	struct work_struct shrink_work;
186cab7a7e5SSebastian Andrzej Siewior 	struct hlist_node node;
187f1c54846SDan Streetman 	char tfm_name[CRYPTO_MAX_ALG_NAME];
188a65b0e76SDomenico Cerasuolo 	struct list_lru list_lru;
189a65b0e76SDomenico Cerasuolo 	struct mem_cgroup *next_shrink;
190b5ba474fSNhat Pham 	struct shrinker *shrinker;
191b5ba474fSNhat Pham 	atomic_t nr_stored;
192f1c54846SDan Streetman };
193f1c54846SDan Streetman 
1942b281117SSeth Jennings /*
1952b281117SSeth Jennings  * struct zswap_entry
1962b281117SSeth Jennings  *
1972b281117SSeth Jennings  * This structure contains the metadata for tracking a single compressed
1982b281117SSeth Jennings  * page within zswap.
1992b281117SSeth Jennings  *
2002b281117SSeth Jennings  * rbnode - links the entry into red-black tree for the appropriate swap type
20197157d89SXiu Jianfeng  * swpentry - associated swap entry, the offset indexes into the red-black tree
2022b281117SSeth Jennings  * refcount - the number of outstanding reference to the entry. This is needed
2032b281117SSeth Jennings  *            to protect against premature freeing of the entry by code
2046b452516SSeongJae Park  *            concurrent calls to load, invalidate, and writeback.  The lock
2052b281117SSeth Jennings  *            for the zswap_tree structure that contains the entry must
2062b281117SSeth Jennings  *            be held while changing the refcount.  Since the lock must
2072b281117SSeth Jennings  *            be held, there is no reason to also make refcount atomic.
2082b281117SSeth Jennings  * length - the length in bytes of the compressed page data.  Needed during
209f999f38bSDomenico Cerasuolo  *          decompression. For a same value filled page length is 0, and both
210f999f38bSDomenico Cerasuolo  *          pool and lru are invalid and must be ignored.
211f1c54846SDan Streetman  * pool - the zswap_pool the entry's data is in
212f1c54846SDan Streetman  * handle - zpool allocation handle that stores the compressed page data
213a85f878bSSrividya Desireddy  * value - value of the same-value filled pages which have same content
21497157d89SXiu Jianfeng  * objcg - the obj_cgroup that the compressed memory is charged to
215f999f38bSDomenico Cerasuolo  * lru - handle to the pool's lru used to evict pages.
2162b281117SSeth Jennings  */
2172b281117SSeth Jennings struct zswap_entry {
2182b281117SSeth Jennings 	struct rb_node rbnode;
2190bb48849SDomenico Cerasuolo 	swp_entry_t swpentry;
2202b281117SSeth Jennings 	int refcount;
2212b281117SSeth Jennings 	unsigned int length;
222f1c54846SDan Streetman 	struct zswap_pool *pool;
223a85f878bSSrividya Desireddy 	union {
2242b281117SSeth Jennings 		unsigned long handle;
225a85f878bSSrividya Desireddy 		unsigned long value;
226a85f878bSSrividya Desireddy 	};
227f4840ccfSJohannes Weiner 	struct obj_cgroup *objcg;
228f999f38bSDomenico Cerasuolo 	struct list_head lru;
2292b281117SSeth Jennings };
2302b281117SSeth Jennings 
2312b281117SSeth Jennings /*
2322b281117SSeth Jennings  * The tree lock in the zswap_tree struct protects a few things:
2332b281117SSeth Jennings  * - the rbtree
2342b281117SSeth Jennings  * - the refcount field of each entry in the tree
2352b281117SSeth Jennings  */
2362b281117SSeth Jennings struct zswap_tree {
2372b281117SSeth Jennings 	struct rb_root rbroot;
2382b281117SSeth Jennings 	spinlock_t lock;
2392b281117SSeth Jennings };
2402b281117SSeth Jennings 
2412b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
24244c7c734SChengming Zhou static unsigned int nr_zswap_trees[MAX_SWAPFILES];
2432b281117SSeth Jennings 
244f1c54846SDan Streetman /* RCU-protected iteration */
245f1c54846SDan Streetman static LIST_HEAD(zswap_pools);
246f1c54846SDan Streetman /* protects zswap_pools list modification */
247f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock);
24832a4e169SDan Streetman /* pool counter to provide unique names to zpool */
24932a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0);
250f1c54846SDan Streetman 
2519021ccecSLiu Shixin enum zswap_init_type {
2529021ccecSLiu Shixin 	ZSWAP_UNINIT,
2539021ccecSLiu Shixin 	ZSWAP_INIT_SUCCEED,
2549021ccecSLiu Shixin 	ZSWAP_INIT_FAILED
2559021ccecSLiu Shixin };
25690b0fc26SDan Streetman 
2579021ccecSLiu Shixin static enum zswap_init_type zswap_init_state;
258d7b028f5SDan Streetman 
259141fdeecSLiu Shixin /* used to ensure the integrity of initialization */
260141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock);
261f1c54846SDan Streetman 
262ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */
263ae3d89a7SDan Streetman static bool zswap_has_pool;
264ae3d89a7SDan Streetman 
265f1c54846SDan Streetman /*********************************
266f1c54846SDan Streetman * helpers and fwd declarations
267f1c54846SDan Streetman **********************************/
268f1c54846SDan Streetman 
26944c7c734SChengming Zhou static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
27044c7c734SChengming Zhou {
27144c7c734SChengming Zhou 	return &zswap_trees[swp_type(swp)][swp_offset(swp)
27244c7c734SChengming Zhou 		>> SWAP_ADDRESS_SPACE_SHIFT];
27344c7c734SChengming Zhou }
27444c7c734SChengming Zhou 
275f1c54846SDan Streetman #define zswap_pool_debug(msg, p)				\
276f1c54846SDan Streetman 	pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,		\
277b8cf32dcSYosry Ahmed 		 zpool_get_type((p)->zpools[0]))
278f1c54846SDan Streetman 
2790bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry,
2805878303cSChengming Zhou 				 swp_entry_t swpentry);
281f1c54846SDan Streetman 
282f1c54846SDan Streetman static bool zswap_is_full(void)
283f1c54846SDan Streetman {
284ca79b0c2SArun KS 	return totalram_pages() * zswap_max_pool_percent / 100 <
285f1c54846SDan Streetman 			DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
286f1c54846SDan Streetman }
287f1c54846SDan Streetman 
28845190f01SVitaly Wool static bool zswap_can_accept(void)
28945190f01SVitaly Wool {
29045190f01SVitaly Wool 	return totalram_pages() * zswap_accept_thr_percent / 100 *
29145190f01SVitaly Wool 				zswap_max_pool_percent / 100 >
29245190f01SVitaly Wool 			DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
29345190f01SVitaly Wool }
29445190f01SVitaly Wool 
295b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool)
296b5ba474fSNhat Pham {
297b5ba474fSNhat Pham 	u64 pool_size = 0;
298b5ba474fSNhat Pham 	int i;
299b5ba474fSNhat Pham 
300b5ba474fSNhat Pham 	for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
301b5ba474fSNhat Pham 		pool_size += zpool_get_total_size(pool->zpools[i]);
302b5ba474fSNhat Pham 
303b5ba474fSNhat Pham 	return pool_size;
304b5ba474fSNhat Pham }
305b5ba474fSNhat Pham 
306f1c54846SDan Streetman static void zswap_update_total_size(void)
307f1c54846SDan Streetman {
308f1c54846SDan Streetman 	struct zswap_pool *pool;
309f1c54846SDan Streetman 	u64 total = 0;
310f1c54846SDan Streetman 
311f1c54846SDan Streetman 	rcu_read_lock();
312f1c54846SDan Streetman 
313f1c54846SDan Streetman 	list_for_each_entry_rcu(pool, &zswap_pools, list)
314b5ba474fSNhat Pham 		total += get_zswap_pool_size(pool);
315f1c54846SDan Streetman 
316f1c54846SDan Streetman 	rcu_read_unlock();
317f1c54846SDan Streetman 
318f1c54846SDan Streetman 	zswap_pool_total_size = total;
319f1c54846SDan Streetman }
320f1c54846SDan Streetman 
321a984649bSJohannes Weiner /*********************************
322a984649bSJohannes Weiner * pool functions
323a984649bSJohannes Weiner **********************************/
324a984649bSJohannes Weiner 
325a984649bSJohannes Weiner static void zswap_alloc_shrinker(struct zswap_pool *pool);
326a984649bSJohannes Weiner static void shrink_worker(struct work_struct *w);
327a984649bSJohannes Weiner 
328a984649bSJohannes Weiner static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
329a984649bSJohannes Weiner {
330a984649bSJohannes Weiner 	int i;
331a984649bSJohannes Weiner 	struct zswap_pool *pool;
332a984649bSJohannes Weiner 	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
333a984649bSJohannes Weiner 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
334a984649bSJohannes Weiner 	int ret;
335a984649bSJohannes Weiner 
336a984649bSJohannes Weiner 	if (!zswap_has_pool) {
337a984649bSJohannes Weiner 		/* if either are unset, pool initialization failed, and we
338a984649bSJohannes Weiner 		 * need both params to be set correctly before trying to
339a984649bSJohannes Weiner 		 * create a pool.
340a984649bSJohannes Weiner 		 */
341a984649bSJohannes Weiner 		if (!strcmp(type, ZSWAP_PARAM_UNSET))
342a984649bSJohannes Weiner 			return NULL;
343a984649bSJohannes Weiner 		if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
344a984649bSJohannes Weiner 			return NULL;
345a984649bSJohannes Weiner 	}
346a984649bSJohannes Weiner 
347a984649bSJohannes Weiner 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
348a984649bSJohannes Weiner 	if (!pool)
349a984649bSJohannes Weiner 		return NULL;
350a984649bSJohannes Weiner 
351a984649bSJohannes Weiner 	for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
352a984649bSJohannes Weiner 		/* unique name for each pool specifically required by zsmalloc */
353a984649bSJohannes Weiner 		snprintf(name, 38, "zswap%x",
354a984649bSJohannes Weiner 			 atomic_inc_return(&zswap_pools_count));
355a984649bSJohannes Weiner 
356a984649bSJohannes Weiner 		pool->zpools[i] = zpool_create_pool(type, name, gfp);
357a984649bSJohannes Weiner 		if (!pool->zpools[i]) {
358a984649bSJohannes Weiner 			pr_err("%s zpool not available\n", type);
359a984649bSJohannes Weiner 			goto error;
360a984649bSJohannes Weiner 		}
361a984649bSJohannes Weiner 	}
362a984649bSJohannes Weiner 	pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
363a984649bSJohannes Weiner 
364a984649bSJohannes Weiner 	strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
365a984649bSJohannes Weiner 
366a984649bSJohannes Weiner 	pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
367a984649bSJohannes Weiner 	if (!pool->acomp_ctx) {
368a984649bSJohannes Weiner 		pr_err("percpu alloc failed\n");
369a984649bSJohannes Weiner 		goto error;
370a984649bSJohannes Weiner 	}
371a984649bSJohannes Weiner 
372a984649bSJohannes Weiner 	ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
373a984649bSJohannes Weiner 				       &pool->node);
374a984649bSJohannes Weiner 	if (ret)
375a984649bSJohannes Weiner 		goto error;
376a984649bSJohannes Weiner 
377a984649bSJohannes Weiner 	zswap_alloc_shrinker(pool);
378a984649bSJohannes Weiner 	if (!pool->shrinker)
379a984649bSJohannes Weiner 		goto error;
380a984649bSJohannes Weiner 
381a984649bSJohannes Weiner 	pr_debug("using %s compressor\n", pool->tfm_name);
382a984649bSJohannes Weiner 
383a984649bSJohannes Weiner 	/* being the current pool takes 1 ref; this func expects the
384a984649bSJohannes Weiner 	 * caller to always add the new pool as the current pool
385a984649bSJohannes Weiner 	 */
386a984649bSJohannes Weiner 	kref_init(&pool->kref);
387a984649bSJohannes Weiner 	INIT_LIST_HEAD(&pool->list);
388a984649bSJohannes Weiner 	if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
389a984649bSJohannes Weiner 		goto lru_fail;
390a984649bSJohannes Weiner 	shrinker_register(pool->shrinker);
391a984649bSJohannes Weiner 	INIT_WORK(&pool->shrink_work, shrink_worker);
392a984649bSJohannes Weiner 	atomic_set(&pool->nr_stored, 0);
393a984649bSJohannes Weiner 
394a984649bSJohannes Weiner 	zswap_pool_debug("created", pool);
395a984649bSJohannes Weiner 
396a984649bSJohannes Weiner 	return pool;
397a984649bSJohannes Weiner 
398a984649bSJohannes Weiner lru_fail:
399a984649bSJohannes Weiner 	list_lru_destroy(&pool->list_lru);
400a984649bSJohannes Weiner 	shrinker_free(pool->shrinker);
401a984649bSJohannes Weiner error:
402a984649bSJohannes Weiner 	if (pool->acomp_ctx)
403a984649bSJohannes Weiner 		free_percpu(pool->acomp_ctx);
404a984649bSJohannes Weiner 	while (i--)
405a984649bSJohannes Weiner 		zpool_destroy_pool(pool->zpools[i]);
406a984649bSJohannes Weiner 	kfree(pool);
407a984649bSJohannes Weiner 	return NULL;
408a984649bSJohannes Weiner }
409a984649bSJohannes Weiner 
410a984649bSJohannes Weiner static struct zswap_pool *__zswap_pool_create_fallback(void)
411a984649bSJohannes Weiner {
412a984649bSJohannes Weiner 	bool has_comp, has_zpool;
413a984649bSJohannes Weiner 
414a984649bSJohannes Weiner 	has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
415a984649bSJohannes Weiner 	if (!has_comp && strcmp(zswap_compressor,
416a984649bSJohannes Weiner 				CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
417a984649bSJohannes Weiner 		pr_err("compressor %s not available, using default %s\n",
418a984649bSJohannes Weiner 		       zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
419a984649bSJohannes Weiner 		param_free_charp(&zswap_compressor);
420a984649bSJohannes Weiner 		zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
421a984649bSJohannes Weiner 		has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
422a984649bSJohannes Weiner 	}
423a984649bSJohannes Weiner 	if (!has_comp) {
424a984649bSJohannes Weiner 		pr_err("default compressor %s not available\n",
425a984649bSJohannes Weiner 		       zswap_compressor);
426a984649bSJohannes Weiner 		param_free_charp(&zswap_compressor);
427a984649bSJohannes Weiner 		zswap_compressor = ZSWAP_PARAM_UNSET;
428a984649bSJohannes Weiner 	}
429a984649bSJohannes Weiner 
430a984649bSJohannes Weiner 	has_zpool = zpool_has_pool(zswap_zpool_type);
431a984649bSJohannes Weiner 	if (!has_zpool && strcmp(zswap_zpool_type,
432a984649bSJohannes Weiner 				 CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
433a984649bSJohannes Weiner 		pr_err("zpool %s not available, using default %s\n",
434a984649bSJohannes Weiner 		       zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
435a984649bSJohannes Weiner 		param_free_charp(&zswap_zpool_type);
436a984649bSJohannes Weiner 		zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
437a984649bSJohannes Weiner 		has_zpool = zpool_has_pool(zswap_zpool_type);
438a984649bSJohannes Weiner 	}
439a984649bSJohannes Weiner 	if (!has_zpool) {
440a984649bSJohannes Weiner 		pr_err("default zpool %s not available\n",
441a984649bSJohannes Weiner 		       zswap_zpool_type);
442a984649bSJohannes Weiner 		param_free_charp(&zswap_zpool_type);
443a984649bSJohannes Weiner 		zswap_zpool_type = ZSWAP_PARAM_UNSET;
444a984649bSJohannes Weiner 	}
445a984649bSJohannes Weiner 
446a984649bSJohannes Weiner 	if (!has_comp || !has_zpool)
447a984649bSJohannes Weiner 		return NULL;
448a984649bSJohannes Weiner 
449a984649bSJohannes Weiner 	return zswap_pool_create(zswap_zpool_type, zswap_compressor);
450a984649bSJohannes Weiner }
451a984649bSJohannes Weiner 
452a984649bSJohannes Weiner static void zswap_pool_destroy(struct zswap_pool *pool)
453a984649bSJohannes Weiner {
454a984649bSJohannes Weiner 	int i;
455a984649bSJohannes Weiner 
456a984649bSJohannes Weiner 	zswap_pool_debug("destroying", pool);
457a984649bSJohannes Weiner 
458a984649bSJohannes Weiner 	shrinker_free(pool->shrinker);
459a984649bSJohannes Weiner 	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
460a984649bSJohannes Weiner 	free_percpu(pool->acomp_ctx);
461a984649bSJohannes Weiner 	list_lru_destroy(&pool->list_lru);
462a984649bSJohannes Weiner 
463a984649bSJohannes Weiner 	spin_lock(&zswap_pools_lock);
464a984649bSJohannes Weiner 	mem_cgroup_iter_break(NULL, pool->next_shrink);
465a984649bSJohannes Weiner 	pool->next_shrink = NULL;
466a984649bSJohannes Weiner 	spin_unlock(&zswap_pools_lock);
467a984649bSJohannes Weiner 
468a984649bSJohannes Weiner 	for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
469a984649bSJohannes Weiner 		zpool_destroy_pool(pool->zpools[i]);
470a984649bSJohannes Weiner 	kfree(pool);
471a984649bSJohannes Weiner }
472a984649bSJohannes Weiner 
47339f3ec8eSJohannes Weiner static void __zswap_pool_release(struct work_struct *work)
47439f3ec8eSJohannes Weiner {
47539f3ec8eSJohannes Weiner 	struct zswap_pool *pool = container_of(work, typeof(*pool),
47639f3ec8eSJohannes Weiner 						release_work);
47739f3ec8eSJohannes Weiner 
47839f3ec8eSJohannes Weiner 	synchronize_rcu();
47939f3ec8eSJohannes Weiner 
48039f3ec8eSJohannes Weiner 	/* nobody should have been able to get a kref... */
48139f3ec8eSJohannes Weiner 	WARN_ON(kref_get_unless_zero(&pool->kref));
48239f3ec8eSJohannes Weiner 
48339f3ec8eSJohannes Weiner 	/* pool is now off zswap_pools list and has no references. */
48439f3ec8eSJohannes Weiner 	zswap_pool_destroy(pool);
48539f3ec8eSJohannes Weiner }
48639f3ec8eSJohannes Weiner 
48739f3ec8eSJohannes Weiner static struct zswap_pool *zswap_pool_current(void);
48839f3ec8eSJohannes Weiner 
48939f3ec8eSJohannes Weiner static void __zswap_pool_empty(struct kref *kref)
49039f3ec8eSJohannes Weiner {
49139f3ec8eSJohannes Weiner 	struct zswap_pool *pool;
49239f3ec8eSJohannes Weiner 
49339f3ec8eSJohannes Weiner 	pool = container_of(kref, typeof(*pool), kref);
49439f3ec8eSJohannes Weiner 
49539f3ec8eSJohannes Weiner 	spin_lock(&zswap_pools_lock);
49639f3ec8eSJohannes Weiner 
49739f3ec8eSJohannes Weiner 	WARN_ON(pool == zswap_pool_current());
49839f3ec8eSJohannes Weiner 
49939f3ec8eSJohannes Weiner 	list_del_rcu(&pool->list);
50039f3ec8eSJohannes Weiner 
50139f3ec8eSJohannes Weiner 	INIT_WORK(&pool->release_work, __zswap_pool_release);
50239f3ec8eSJohannes Weiner 	schedule_work(&pool->release_work);
50339f3ec8eSJohannes Weiner 
50439f3ec8eSJohannes Weiner 	spin_unlock(&zswap_pools_lock);
50539f3ec8eSJohannes Weiner }
50639f3ec8eSJohannes Weiner 
50739f3ec8eSJohannes Weiner static int __must_check zswap_pool_get(struct zswap_pool *pool)
50839f3ec8eSJohannes Weiner {
50939f3ec8eSJohannes Weiner 	if (!pool)
51039f3ec8eSJohannes Weiner 		return 0;
51139f3ec8eSJohannes Weiner 
51239f3ec8eSJohannes Weiner 	return kref_get_unless_zero(&pool->kref);
51339f3ec8eSJohannes Weiner }
51439f3ec8eSJohannes Weiner 
51539f3ec8eSJohannes Weiner static void zswap_pool_put(struct zswap_pool *pool)
51639f3ec8eSJohannes Weiner {
51739f3ec8eSJohannes Weiner 	kref_put(&pool->kref, __zswap_pool_empty);
51839f3ec8eSJohannes Weiner }
51939f3ec8eSJohannes Weiner 
520c1a0ecb8SJohannes Weiner static struct zswap_pool *__zswap_pool_current(void)
521c1a0ecb8SJohannes Weiner {
522c1a0ecb8SJohannes Weiner 	struct zswap_pool *pool;
523c1a0ecb8SJohannes Weiner 
524c1a0ecb8SJohannes Weiner 	pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
525c1a0ecb8SJohannes Weiner 	WARN_ONCE(!pool && zswap_has_pool,
526c1a0ecb8SJohannes Weiner 		  "%s: no page storage pool!\n", __func__);
527c1a0ecb8SJohannes Weiner 
528c1a0ecb8SJohannes Weiner 	return pool;
529c1a0ecb8SJohannes Weiner }
530c1a0ecb8SJohannes Weiner 
531c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current(void)
532c1a0ecb8SJohannes Weiner {
533c1a0ecb8SJohannes Weiner 	assert_spin_locked(&zswap_pools_lock);
534c1a0ecb8SJohannes Weiner 
535c1a0ecb8SJohannes Weiner 	return __zswap_pool_current();
536c1a0ecb8SJohannes Weiner }
537c1a0ecb8SJohannes Weiner 
538c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current_get(void)
539c1a0ecb8SJohannes Weiner {
540c1a0ecb8SJohannes Weiner 	struct zswap_pool *pool;
541c1a0ecb8SJohannes Weiner 
542c1a0ecb8SJohannes Weiner 	rcu_read_lock();
543c1a0ecb8SJohannes Weiner 
544c1a0ecb8SJohannes Weiner 	pool = __zswap_pool_current();
545c1a0ecb8SJohannes Weiner 	if (!zswap_pool_get(pool))
546c1a0ecb8SJohannes Weiner 		pool = NULL;
547c1a0ecb8SJohannes Weiner 
548c1a0ecb8SJohannes Weiner 	rcu_read_unlock();
549c1a0ecb8SJohannes Weiner 
550c1a0ecb8SJohannes Weiner 	return pool;
551c1a0ecb8SJohannes Weiner }
552c1a0ecb8SJohannes Weiner 
553c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_last_get(void)
554c1a0ecb8SJohannes Weiner {
555c1a0ecb8SJohannes Weiner 	struct zswap_pool *pool, *last = NULL;
556c1a0ecb8SJohannes Weiner 
557c1a0ecb8SJohannes Weiner 	rcu_read_lock();
558c1a0ecb8SJohannes Weiner 
559c1a0ecb8SJohannes Weiner 	list_for_each_entry_rcu(pool, &zswap_pools, list)
560c1a0ecb8SJohannes Weiner 		last = pool;
561c1a0ecb8SJohannes Weiner 	WARN_ONCE(!last && zswap_has_pool,
562c1a0ecb8SJohannes Weiner 		  "%s: no page storage pool!\n", __func__);
563c1a0ecb8SJohannes Weiner 	if (!zswap_pool_get(last))
564c1a0ecb8SJohannes Weiner 		last = NULL;
565c1a0ecb8SJohannes Weiner 
566c1a0ecb8SJohannes Weiner 	rcu_read_unlock();
567c1a0ecb8SJohannes Weiner 
568c1a0ecb8SJohannes Weiner 	return last;
569c1a0ecb8SJohannes Weiner }
570c1a0ecb8SJohannes Weiner 
571c1a0ecb8SJohannes Weiner /* type and compressor must be null-terminated */
572c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
573c1a0ecb8SJohannes Weiner {
574c1a0ecb8SJohannes Weiner 	struct zswap_pool *pool;
575c1a0ecb8SJohannes Weiner 
576c1a0ecb8SJohannes Weiner 	assert_spin_locked(&zswap_pools_lock);
577c1a0ecb8SJohannes Weiner 
578c1a0ecb8SJohannes Weiner 	list_for_each_entry_rcu(pool, &zswap_pools, list) {
579c1a0ecb8SJohannes Weiner 		if (strcmp(pool->tfm_name, compressor))
580c1a0ecb8SJohannes Weiner 			continue;
581c1a0ecb8SJohannes Weiner 		/* all zpools share the same type */
582c1a0ecb8SJohannes Weiner 		if (strcmp(zpool_get_type(pool->zpools[0]), type))
583c1a0ecb8SJohannes Weiner 			continue;
584c1a0ecb8SJohannes Weiner 		/* if we can't get it, it's about to be destroyed */
585c1a0ecb8SJohannes Weiner 		if (!zswap_pool_get(pool))
586c1a0ecb8SJohannes Weiner 			continue;
587c1a0ecb8SJohannes Weiner 		return pool;
588c1a0ecb8SJohannes Weiner 	}
589c1a0ecb8SJohannes Weiner 
590c1a0ecb8SJohannes Weiner 	return NULL;
591c1a0ecb8SJohannes Weiner }
592c1a0ecb8SJohannes Weiner 
593abca07c0SJohannes Weiner /*********************************
594abca07c0SJohannes Weiner * param callbacks
595abca07c0SJohannes Weiner **********************************/
596abca07c0SJohannes Weiner 
597abca07c0SJohannes Weiner static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
598abca07c0SJohannes Weiner {
599abca07c0SJohannes Weiner 	/* no change required */
600abca07c0SJohannes Weiner 	if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
601abca07c0SJohannes Weiner 		return false;
602abca07c0SJohannes Weiner 	return true;
603abca07c0SJohannes Weiner }
604abca07c0SJohannes Weiner 
605abca07c0SJohannes Weiner /* val must be a null-terminated string */
606abca07c0SJohannes Weiner static int __zswap_param_set(const char *val, const struct kernel_param *kp,
607abca07c0SJohannes Weiner 			     char *type, char *compressor)
608abca07c0SJohannes Weiner {
609abca07c0SJohannes Weiner 	struct zswap_pool *pool, *put_pool = NULL;
610abca07c0SJohannes Weiner 	char *s = strstrip((char *)val);
611abca07c0SJohannes Weiner 	int ret = 0;
612abca07c0SJohannes Weiner 	bool new_pool = false;
613abca07c0SJohannes Weiner 
614abca07c0SJohannes Weiner 	mutex_lock(&zswap_init_lock);
615abca07c0SJohannes Weiner 	switch (zswap_init_state) {
616abca07c0SJohannes Weiner 	case ZSWAP_UNINIT:
617abca07c0SJohannes Weiner 		/* if this is load-time (pre-init) param setting,
618abca07c0SJohannes Weiner 		 * don't create a pool; that's done during init.
619abca07c0SJohannes Weiner 		 */
620abca07c0SJohannes Weiner 		ret = param_set_charp(s, kp);
621abca07c0SJohannes Weiner 		break;
622abca07c0SJohannes Weiner 	case ZSWAP_INIT_SUCCEED:
623abca07c0SJohannes Weiner 		new_pool = zswap_pool_changed(s, kp);
624abca07c0SJohannes Weiner 		break;
625abca07c0SJohannes Weiner 	case ZSWAP_INIT_FAILED:
626abca07c0SJohannes Weiner 		pr_err("can't set param, initialization failed\n");
627abca07c0SJohannes Weiner 		ret = -ENODEV;
628abca07c0SJohannes Weiner 	}
629abca07c0SJohannes Weiner 	mutex_unlock(&zswap_init_lock);
630abca07c0SJohannes Weiner 
631abca07c0SJohannes Weiner 	/* no need to create a new pool, return directly */
632abca07c0SJohannes Weiner 	if (!new_pool)
633abca07c0SJohannes Weiner 		return ret;
634abca07c0SJohannes Weiner 
635abca07c0SJohannes Weiner 	if (!type) {
636abca07c0SJohannes Weiner 		if (!zpool_has_pool(s)) {
637abca07c0SJohannes Weiner 			pr_err("zpool %s not available\n", s);
638abca07c0SJohannes Weiner 			return -ENOENT;
639abca07c0SJohannes Weiner 		}
640abca07c0SJohannes Weiner 		type = s;
641abca07c0SJohannes Weiner 	} else if (!compressor) {
642abca07c0SJohannes Weiner 		if (!crypto_has_acomp(s, 0, 0)) {
643abca07c0SJohannes Weiner 			pr_err("compressor %s not available\n", s);
644abca07c0SJohannes Weiner 			return -ENOENT;
645abca07c0SJohannes Weiner 		}
646abca07c0SJohannes Weiner 		compressor = s;
647abca07c0SJohannes Weiner 	} else {
648abca07c0SJohannes Weiner 		WARN_ON(1);
649abca07c0SJohannes Weiner 		return -EINVAL;
650abca07c0SJohannes Weiner 	}
651abca07c0SJohannes Weiner 
652abca07c0SJohannes Weiner 	spin_lock(&zswap_pools_lock);
653abca07c0SJohannes Weiner 
654abca07c0SJohannes Weiner 	pool = zswap_pool_find_get(type, compressor);
655abca07c0SJohannes Weiner 	if (pool) {
656abca07c0SJohannes Weiner 		zswap_pool_debug("using existing", pool);
657abca07c0SJohannes Weiner 		WARN_ON(pool == zswap_pool_current());
658abca07c0SJohannes Weiner 		list_del_rcu(&pool->list);
659abca07c0SJohannes Weiner 	}
660abca07c0SJohannes Weiner 
661abca07c0SJohannes Weiner 	spin_unlock(&zswap_pools_lock);
662abca07c0SJohannes Weiner 
663abca07c0SJohannes Weiner 	if (!pool)
664abca07c0SJohannes Weiner 		pool = zswap_pool_create(type, compressor);
665abca07c0SJohannes Weiner 
666abca07c0SJohannes Weiner 	if (pool)
667abca07c0SJohannes Weiner 		ret = param_set_charp(s, kp);
668abca07c0SJohannes Weiner 	else
669abca07c0SJohannes Weiner 		ret = -EINVAL;
670abca07c0SJohannes Weiner 
671abca07c0SJohannes Weiner 	spin_lock(&zswap_pools_lock);
672abca07c0SJohannes Weiner 
673abca07c0SJohannes Weiner 	if (!ret) {
674abca07c0SJohannes Weiner 		put_pool = zswap_pool_current();
675abca07c0SJohannes Weiner 		list_add_rcu(&pool->list, &zswap_pools);
676abca07c0SJohannes Weiner 		zswap_has_pool = true;
677abca07c0SJohannes Weiner 	} else if (pool) {
678abca07c0SJohannes Weiner 		/* add the possibly pre-existing pool to the end of the pools
679abca07c0SJohannes Weiner 		 * list; if it's new (and empty) then it'll be removed and
680abca07c0SJohannes Weiner 		 * destroyed by the put after we drop the lock
681abca07c0SJohannes Weiner 		 */
682abca07c0SJohannes Weiner 		list_add_tail_rcu(&pool->list, &zswap_pools);
683abca07c0SJohannes Weiner 		put_pool = pool;
684abca07c0SJohannes Weiner 	}
685abca07c0SJohannes Weiner 
686abca07c0SJohannes Weiner 	spin_unlock(&zswap_pools_lock);
687abca07c0SJohannes Weiner 
688abca07c0SJohannes Weiner 	if (!zswap_has_pool && !pool) {
689abca07c0SJohannes Weiner 		/* if initial pool creation failed, and this pool creation also
690abca07c0SJohannes Weiner 		 * failed, maybe both compressor and zpool params were bad.
691abca07c0SJohannes Weiner 		 * Allow changing this param, so pool creation will succeed
692abca07c0SJohannes Weiner 		 * when the other param is changed. We already verified this
693abca07c0SJohannes Weiner 		 * param is ok in the zpool_has_pool() or crypto_has_acomp()
694abca07c0SJohannes Weiner 		 * checks above.
695abca07c0SJohannes Weiner 		 */
696abca07c0SJohannes Weiner 		ret = param_set_charp(s, kp);
697abca07c0SJohannes Weiner 	}
698abca07c0SJohannes Weiner 
699abca07c0SJohannes Weiner 	/* drop the ref from either the old current pool,
700abca07c0SJohannes Weiner 	 * or the new pool we failed to add
701abca07c0SJohannes Weiner 	 */
702abca07c0SJohannes Weiner 	if (put_pool)
703abca07c0SJohannes Weiner 		zswap_pool_put(put_pool);
704abca07c0SJohannes Weiner 
705abca07c0SJohannes Weiner 	return ret;
706abca07c0SJohannes Weiner }
707abca07c0SJohannes Weiner 
708abca07c0SJohannes Weiner static int zswap_compressor_param_set(const char *val,
709abca07c0SJohannes Weiner 				      const struct kernel_param *kp)
710abca07c0SJohannes Weiner {
711abca07c0SJohannes Weiner 	return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
712abca07c0SJohannes Weiner }
713abca07c0SJohannes Weiner 
714abca07c0SJohannes Weiner static int zswap_zpool_param_set(const char *val,
715abca07c0SJohannes Weiner 				 const struct kernel_param *kp)
716abca07c0SJohannes Weiner {
717abca07c0SJohannes Weiner 	return __zswap_param_set(val, kp, NULL, zswap_compressor);
718abca07c0SJohannes Weiner }
719abca07c0SJohannes Weiner 
720abca07c0SJohannes Weiner static int zswap_enabled_param_set(const char *val,
721abca07c0SJohannes Weiner 				   const struct kernel_param *kp)
722abca07c0SJohannes Weiner {
723abca07c0SJohannes Weiner 	int ret = -ENODEV;
724abca07c0SJohannes Weiner 
725abca07c0SJohannes Weiner 	/* if this is load-time (pre-init) param setting, only set param. */
726abca07c0SJohannes Weiner 	if (system_state != SYSTEM_RUNNING)
727abca07c0SJohannes Weiner 		return param_set_bool(val, kp);
728abca07c0SJohannes Weiner 
729abca07c0SJohannes Weiner 	mutex_lock(&zswap_init_lock);
730abca07c0SJohannes Weiner 	switch (zswap_init_state) {
731abca07c0SJohannes Weiner 	case ZSWAP_UNINIT:
732abca07c0SJohannes Weiner 		if (zswap_setup())
733abca07c0SJohannes Weiner 			break;
734abca07c0SJohannes Weiner 		fallthrough;
735abca07c0SJohannes Weiner 	case ZSWAP_INIT_SUCCEED:
736abca07c0SJohannes Weiner 		if (!zswap_has_pool)
737abca07c0SJohannes Weiner 			pr_err("can't enable, no pool configured\n");
738abca07c0SJohannes Weiner 		else
739abca07c0SJohannes Weiner 			ret = param_set_bool(val, kp);
740abca07c0SJohannes Weiner 		break;
741abca07c0SJohannes Weiner 	case ZSWAP_INIT_FAILED:
742abca07c0SJohannes Weiner 		pr_err("can't enable, initialization failed\n");
743abca07c0SJohannes Weiner 	}
744abca07c0SJohannes Weiner 	mutex_unlock(&zswap_init_lock);
745abca07c0SJohannes Weiner 
746abca07c0SJohannes Weiner 	return ret;
747abca07c0SJohannes Weiner }
748abca07c0SJohannes Weiner 
749*506a86c5SJohannes Weiner /*********************************
750*506a86c5SJohannes Weiner * lru functions
751*506a86c5SJohannes Weiner **********************************/
752*506a86c5SJohannes Weiner 
753a65b0e76SDomenico Cerasuolo /* should be called under RCU */
754a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG
755a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
756a65b0e76SDomenico Cerasuolo {
757a65b0e76SDomenico Cerasuolo 	return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
758a65b0e76SDomenico Cerasuolo }
759a65b0e76SDomenico Cerasuolo #else
760a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
761a65b0e76SDomenico Cerasuolo {
762a65b0e76SDomenico Cerasuolo 	return NULL;
763a65b0e76SDomenico Cerasuolo }
764a65b0e76SDomenico Cerasuolo #endif
765a65b0e76SDomenico Cerasuolo 
766a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry)
767a65b0e76SDomenico Cerasuolo {
768a65b0e76SDomenico Cerasuolo 	return page_to_nid(virt_to_page(entry));
769a65b0e76SDomenico Cerasuolo }
770a65b0e76SDomenico Cerasuolo 
771*506a86c5SJohannes Weiner void zswap_lruvec_state_init(struct lruvec *lruvec)
772*506a86c5SJohannes Weiner {
773*506a86c5SJohannes Weiner 	atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
774*506a86c5SJohannes Weiner }
775*506a86c5SJohannes Weiner 
776*506a86c5SJohannes Weiner void zswap_folio_swapin(struct folio *folio)
777*506a86c5SJohannes Weiner {
778*506a86c5SJohannes Weiner 	struct lruvec *lruvec;
779*506a86c5SJohannes Weiner 
780*506a86c5SJohannes Weiner 	if (folio) {
781*506a86c5SJohannes Weiner 		lruvec = folio_lruvec(folio);
782*506a86c5SJohannes Weiner 		atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
783*506a86c5SJohannes Weiner 	}
784*506a86c5SJohannes Weiner }
785*506a86c5SJohannes Weiner 
786a65b0e76SDomenico Cerasuolo void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
787a65b0e76SDomenico Cerasuolo {
788a65b0e76SDomenico Cerasuolo 	struct zswap_pool *pool;
789a65b0e76SDomenico Cerasuolo 
790a65b0e76SDomenico Cerasuolo 	/* lock out zswap pools list modification */
791a65b0e76SDomenico Cerasuolo 	spin_lock(&zswap_pools_lock);
792a65b0e76SDomenico Cerasuolo 	list_for_each_entry(pool, &zswap_pools, list) {
793a65b0e76SDomenico Cerasuolo 		if (pool->next_shrink == memcg)
794a65b0e76SDomenico Cerasuolo 			pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
795a65b0e76SDomenico Cerasuolo 	}
796a65b0e76SDomenico Cerasuolo 	spin_unlock(&zswap_pools_lock);
797a65b0e76SDomenico Cerasuolo }
798a65b0e76SDomenico Cerasuolo 
7992b281117SSeth Jennings /*********************************
8002b281117SSeth Jennings * zswap entry functions
8012b281117SSeth Jennings **********************************/
8022b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache;
8032b281117SSeth Jennings 
804a65b0e76SDomenico Cerasuolo static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
8052b281117SSeth Jennings {
8062b281117SSeth Jennings 	struct zswap_entry *entry;
807a65b0e76SDomenico Cerasuolo 	entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
8082b281117SSeth Jennings 	if (!entry)
8092b281117SSeth Jennings 		return NULL;
8102b281117SSeth Jennings 	entry->refcount = 1;
8110ab0abcfSWeijie Yang 	RB_CLEAR_NODE(&entry->rbnode);
8122b281117SSeth Jennings 	return entry;
8132b281117SSeth Jennings }
8142b281117SSeth Jennings 
8152b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry)
8162b281117SSeth Jennings {
8172b281117SSeth Jennings 	kmem_cache_free(zswap_entry_cache, entry);
8182b281117SSeth Jennings }
8192b281117SSeth Jennings 
8202b281117SSeth Jennings /*********************************
821a65b0e76SDomenico Cerasuolo * lru functions
822a65b0e76SDomenico Cerasuolo **********************************/
823a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
824a65b0e76SDomenico Cerasuolo {
825b5ba474fSNhat Pham 	atomic_long_t *nr_zswap_protected;
826b5ba474fSNhat Pham 	unsigned long lru_size, old, new;
827a65b0e76SDomenico Cerasuolo 	int nid = entry_to_nid(entry);
828a65b0e76SDomenico Cerasuolo 	struct mem_cgroup *memcg;
829b5ba474fSNhat Pham 	struct lruvec *lruvec;
830a65b0e76SDomenico Cerasuolo 
831a65b0e76SDomenico Cerasuolo 	/*
832a65b0e76SDomenico Cerasuolo 	 * Note that it is safe to use rcu_read_lock() here, even in the face of
833a65b0e76SDomenico Cerasuolo 	 * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection
834a65b0e76SDomenico Cerasuolo 	 * used in list_lru lookup, only two scenarios are possible:
835a65b0e76SDomenico Cerasuolo 	 *
836a65b0e76SDomenico Cerasuolo 	 * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The
837a65b0e76SDomenico Cerasuolo 	 *    new entry will be reparented to memcg's parent's list_lru.
838a65b0e76SDomenico Cerasuolo 	 * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The
839a65b0e76SDomenico Cerasuolo 	 *    new entry will be added directly to memcg's parent's list_lru.
840a65b0e76SDomenico Cerasuolo 	 *
8413f798aa6SChengming Zhou 	 * Similar reasoning holds for list_lru_del().
842a65b0e76SDomenico Cerasuolo 	 */
843a65b0e76SDomenico Cerasuolo 	rcu_read_lock();
844a65b0e76SDomenico Cerasuolo 	memcg = mem_cgroup_from_entry(entry);
845a65b0e76SDomenico Cerasuolo 	/* will always succeed */
846a65b0e76SDomenico Cerasuolo 	list_lru_add(list_lru, &entry->lru, nid, memcg);
847b5ba474fSNhat Pham 
848b5ba474fSNhat Pham 	/* Update the protection area */
849b5ba474fSNhat Pham 	lru_size = list_lru_count_one(list_lru, nid, memcg);
850b5ba474fSNhat Pham 	lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
851b5ba474fSNhat Pham 	nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected;
852b5ba474fSNhat Pham 	old = atomic_long_inc_return(nr_zswap_protected);
853b5ba474fSNhat Pham 	/*
854b5ba474fSNhat Pham 	 * Decay to avoid overflow and adapt to changing workloads.
855b5ba474fSNhat Pham 	 * This is based on LRU reclaim cost decaying heuristics.
856b5ba474fSNhat Pham 	 */
857b5ba474fSNhat Pham 	do {
858b5ba474fSNhat Pham 		new = old > lru_size / 4 ? old / 2 : old;
859b5ba474fSNhat Pham 	} while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new));
860a65b0e76SDomenico Cerasuolo 	rcu_read_unlock();
861a65b0e76SDomenico Cerasuolo }
862a65b0e76SDomenico Cerasuolo 
863a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
864a65b0e76SDomenico Cerasuolo {
865a65b0e76SDomenico Cerasuolo 	int nid = entry_to_nid(entry);
866a65b0e76SDomenico Cerasuolo 	struct mem_cgroup *memcg;
867a65b0e76SDomenico Cerasuolo 
868a65b0e76SDomenico Cerasuolo 	rcu_read_lock();
869a65b0e76SDomenico Cerasuolo 	memcg = mem_cgroup_from_entry(entry);
870a65b0e76SDomenico Cerasuolo 	/* will always succeed */
871a65b0e76SDomenico Cerasuolo 	list_lru_del(list_lru, &entry->lru, nid, memcg);
872a65b0e76SDomenico Cerasuolo 	rcu_read_unlock();
873a65b0e76SDomenico Cerasuolo }
874a65b0e76SDomenico Cerasuolo 
875a65b0e76SDomenico Cerasuolo /*********************************
8762b281117SSeth Jennings * rbtree functions
8772b281117SSeth Jennings **********************************/
8782b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
8792b281117SSeth Jennings {
8802b281117SSeth Jennings 	struct rb_node *node = root->rb_node;
8812b281117SSeth Jennings 	struct zswap_entry *entry;
8820bb48849SDomenico Cerasuolo 	pgoff_t entry_offset;
8832b281117SSeth Jennings 
8842b281117SSeth Jennings 	while (node) {
8852b281117SSeth Jennings 		entry = rb_entry(node, struct zswap_entry, rbnode);
8860bb48849SDomenico Cerasuolo 		entry_offset = swp_offset(entry->swpentry);
8870bb48849SDomenico Cerasuolo 		if (entry_offset > offset)
8882b281117SSeth Jennings 			node = node->rb_left;
8890bb48849SDomenico Cerasuolo 		else if (entry_offset < offset)
8902b281117SSeth Jennings 			node = node->rb_right;
8912b281117SSeth Jennings 		else
8922b281117SSeth Jennings 			return entry;
8932b281117SSeth Jennings 	}
8942b281117SSeth Jennings 	return NULL;
8952b281117SSeth Jennings }
8962b281117SSeth Jennings 
8972b281117SSeth Jennings /*
8982b281117SSeth Jennings  * In the case that a entry with the same offset is found, a pointer to
8992b281117SSeth Jennings  * the existing entry is stored in dupentry and the function returns -EEXIST
9002b281117SSeth Jennings  */
9012b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
9022b281117SSeth Jennings 			struct zswap_entry **dupentry)
9032b281117SSeth Jennings {
9042b281117SSeth Jennings 	struct rb_node **link = &root->rb_node, *parent = NULL;
9052b281117SSeth Jennings 	struct zswap_entry *myentry;
9060bb48849SDomenico Cerasuolo 	pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
9072b281117SSeth Jennings 
9082b281117SSeth Jennings 	while (*link) {
9092b281117SSeth Jennings 		parent = *link;
9102b281117SSeth Jennings 		myentry = rb_entry(parent, struct zswap_entry, rbnode);
9110bb48849SDomenico Cerasuolo 		myentry_offset = swp_offset(myentry->swpentry);
9120bb48849SDomenico Cerasuolo 		if (myentry_offset > entry_offset)
9132b281117SSeth Jennings 			link = &(*link)->rb_left;
9140bb48849SDomenico Cerasuolo 		else if (myentry_offset < entry_offset)
9152b281117SSeth Jennings 			link = &(*link)->rb_right;
9162b281117SSeth Jennings 		else {
9172b281117SSeth Jennings 			*dupentry = myentry;
9182b281117SSeth Jennings 			return -EEXIST;
9192b281117SSeth Jennings 		}
9202b281117SSeth Jennings 	}
9212b281117SSeth Jennings 	rb_link_node(&entry->rbnode, parent, link);
9222b281117SSeth Jennings 	rb_insert_color(&entry->rbnode, root);
9232b281117SSeth Jennings 	return 0;
9242b281117SSeth Jennings }
9252b281117SSeth Jennings 
92618a93707SYosry Ahmed static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
9270ab0abcfSWeijie Yang {
9280ab0abcfSWeijie Yang 	if (!RB_EMPTY_NODE(&entry->rbnode)) {
9290ab0abcfSWeijie Yang 		rb_erase(&entry->rbnode, root);
9300ab0abcfSWeijie Yang 		RB_CLEAR_NODE(&entry->rbnode);
93118a93707SYosry Ahmed 		return true;
9320ab0abcfSWeijie Yang 	}
93318a93707SYosry Ahmed 	return false;
9340ab0abcfSWeijie Yang }
9350ab0abcfSWeijie Yang 
936b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
937b8cf32dcSYosry Ahmed {
938b8cf32dcSYosry Ahmed 	int i = 0;
939b8cf32dcSYosry Ahmed 
940b8cf32dcSYosry Ahmed 	if (ZSWAP_NR_ZPOOLS > 1)
941b8cf32dcSYosry Ahmed 		i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
942b8cf32dcSYosry Ahmed 
943b8cf32dcSYosry Ahmed 	return entry->pool->zpools[i];
944b8cf32dcSYosry Ahmed }
945b8cf32dcSYosry Ahmed 
9460ab0abcfSWeijie Yang /*
94712d79d64SDan Streetman  * Carries out the common pattern of freeing and entry's zpool allocation,
9480ab0abcfSWeijie Yang  * freeing the entry itself, and decrementing the number of stored pages.
9490ab0abcfSWeijie Yang  */
95042398be2SJohannes Weiner static void zswap_entry_free(struct zswap_entry *entry)
9510ab0abcfSWeijie Yang {
952a85f878bSSrividya Desireddy 	if (!entry->length)
953a85f878bSSrividya Desireddy 		atomic_dec(&zswap_same_filled_pages);
954a85f878bSSrividya Desireddy 	else {
955a65b0e76SDomenico Cerasuolo 		zswap_lru_del(&entry->pool->list_lru, entry);
956b8cf32dcSYosry Ahmed 		zpool_free(zswap_find_zpool(entry), entry->handle);
957b5ba474fSNhat Pham 		atomic_dec(&entry->pool->nr_stored);
958f1c54846SDan Streetman 		zswap_pool_put(entry->pool);
959a85f878bSSrividya Desireddy 	}
9602e601e1eSJohannes Weiner 	if (entry->objcg) {
9612e601e1eSJohannes Weiner 		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
9622e601e1eSJohannes Weiner 		obj_cgroup_put(entry->objcg);
9632e601e1eSJohannes Weiner 	}
9640ab0abcfSWeijie Yang 	zswap_entry_cache_free(entry);
9650ab0abcfSWeijie Yang 	atomic_dec(&zswap_stored_pages);
966f1c54846SDan Streetman 	zswap_update_total_size();
9670ab0abcfSWeijie Yang }
9680ab0abcfSWeijie Yang 
9690ab0abcfSWeijie Yang /* caller must hold the tree lock */
9700ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry)
9710ab0abcfSWeijie Yang {
972e477559cSJohannes Weiner 	WARN_ON_ONCE(!entry->refcount);
9730ab0abcfSWeijie Yang 	entry->refcount++;
9740ab0abcfSWeijie Yang }
9750ab0abcfSWeijie Yang 
976dab7711fSJohannes Weiner /* caller must hold the tree lock */
977db128f5fSYosry Ahmed static void zswap_entry_put(struct zswap_entry *entry)
9780ab0abcfSWeijie Yang {
979dab7711fSJohannes Weiner 	WARN_ON_ONCE(!entry->refcount);
980dab7711fSJohannes Weiner 	if (--entry->refcount == 0) {
98173108957SJohannes Weiner 		WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
98242398be2SJohannes Weiner 		zswap_entry_free(entry);
9830ab0abcfSWeijie Yang 	}
9840ab0abcfSWeijie Yang }
9850ab0abcfSWeijie Yang 
9867dd1f7f0SJohannes Weiner /*
9877dd1f7f0SJohannes Weiner  * If the entry is still valid in the tree, drop the initial ref and remove it
9887dd1f7f0SJohannes Weiner  * from the tree. This function must be called with an additional ref held,
9897dd1f7f0SJohannes Weiner  * otherwise it may race with another invalidation freeing the entry.
9907dd1f7f0SJohannes Weiner  */
9917dd1f7f0SJohannes Weiner static void zswap_invalidate_entry(struct zswap_tree *tree,
9927dd1f7f0SJohannes Weiner 				   struct zswap_entry *entry)
9937dd1f7f0SJohannes Weiner {
9947dd1f7f0SJohannes Weiner 	if (zswap_rb_erase(&tree->rbroot, entry))
9957dd1f7f0SJohannes Weiner 		zswap_entry_put(entry);
9967dd1f7f0SJohannes Weiner }
9977dd1f7f0SJohannes Weiner 
9982b281117SSeth Jennings /*********************************
999b5ba474fSNhat Pham * shrinker functions
1000b5ba474fSNhat Pham **********************************/
1001b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
1002b5ba474fSNhat Pham 				       spinlock_t *lock, void *arg);
1003b5ba474fSNhat Pham 
1004b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
1005b5ba474fSNhat Pham 		struct shrink_control *sc)
1006b5ba474fSNhat Pham {
1007b5ba474fSNhat Pham 	struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
1008b5ba474fSNhat Pham 	unsigned long shrink_ret, nr_protected, lru_size;
1009b5ba474fSNhat Pham 	struct zswap_pool *pool = shrinker->private_data;
1010b5ba474fSNhat Pham 	bool encountered_page_in_swapcache = false;
1011b5ba474fSNhat Pham 
1012501a06feSNhat Pham 	if (!zswap_shrinker_enabled ||
1013501a06feSNhat Pham 			!mem_cgroup_zswap_writeback_enabled(sc->memcg)) {
1014b5ba474fSNhat Pham 		sc->nr_scanned = 0;
1015b5ba474fSNhat Pham 		return SHRINK_STOP;
1016b5ba474fSNhat Pham 	}
1017b5ba474fSNhat Pham 
1018b5ba474fSNhat Pham 	nr_protected =
1019b5ba474fSNhat Pham 		atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1020b5ba474fSNhat Pham 	lru_size = list_lru_shrink_count(&pool->list_lru, sc);
1021b5ba474fSNhat Pham 
1022b5ba474fSNhat Pham 	/*
1023b5ba474fSNhat Pham 	 * Abort if we are shrinking into the protected region.
1024b5ba474fSNhat Pham 	 *
1025b5ba474fSNhat Pham 	 * This short-circuiting is necessary because if we have too many multiple
1026b5ba474fSNhat Pham 	 * concurrent reclaimers getting the freeable zswap object counts at the
1027b5ba474fSNhat Pham 	 * same time (before any of them made reasonable progress), the total
1028b5ba474fSNhat Pham 	 * number of reclaimed objects might be more than the number of unprotected
1029b5ba474fSNhat Pham 	 * objects (i.e the reclaimers will reclaim into the protected area of the
1030b5ba474fSNhat Pham 	 * zswap LRU).
1031b5ba474fSNhat Pham 	 */
1032b5ba474fSNhat Pham 	if (nr_protected >= lru_size - sc->nr_to_scan) {
1033b5ba474fSNhat Pham 		sc->nr_scanned = 0;
1034b5ba474fSNhat Pham 		return SHRINK_STOP;
1035b5ba474fSNhat Pham 	}
1036b5ba474fSNhat Pham 
1037b5ba474fSNhat Pham 	shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb,
1038b5ba474fSNhat Pham 		&encountered_page_in_swapcache);
1039b5ba474fSNhat Pham 
1040b5ba474fSNhat Pham 	if (encountered_page_in_swapcache)
1041b5ba474fSNhat Pham 		return SHRINK_STOP;
1042b5ba474fSNhat Pham 
1043b5ba474fSNhat Pham 	return shrink_ret ? shrink_ret : SHRINK_STOP;
1044b5ba474fSNhat Pham }
1045b5ba474fSNhat Pham 
1046b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
1047b5ba474fSNhat Pham 		struct shrink_control *sc)
1048b5ba474fSNhat Pham {
1049b5ba474fSNhat Pham 	struct zswap_pool *pool = shrinker->private_data;
1050b5ba474fSNhat Pham 	struct mem_cgroup *memcg = sc->memcg;
1051b5ba474fSNhat Pham 	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
1052b5ba474fSNhat Pham 	unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
1053b5ba474fSNhat Pham 
1054501a06feSNhat Pham 	if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
1055b5ba474fSNhat Pham 		return 0;
1056b5ba474fSNhat Pham 
1057b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM
10587d7ef0a4SYosry Ahmed 	mem_cgroup_flush_stats(memcg);
1059b5ba474fSNhat Pham 	nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
1060b5ba474fSNhat Pham 	nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
1061b5ba474fSNhat Pham #else
1062b5ba474fSNhat Pham 	/* use pool stats instead of memcg stats */
1063b5ba474fSNhat Pham 	nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
1064b5ba474fSNhat Pham 	nr_stored = atomic_read(&pool->nr_stored);
1065b5ba474fSNhat Pham #endif
1066b5ba474fSNhat Pham 
1067b5ba474fSNhat Pham 	if (!nr_stored)
1068b5ba474fSNhat Pham 		return 0;
1069b5ba474fSNhat Pham 
1070b5ba474fSNhat Pham 	nr_protected =
1071b5ba474fSNhat Pham 		atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1072b5ba474fSNhat Pham 	nr_freeable = list_lru_shrink_count(&pool->list_lru, sc);
1073b5ba474fSNhat Pham 	/*
1074b5ba474fSNhat Pham 	 * Subtract the lru size by an estimate of the number of pages
1075b5ba474fSNhat Pham 	 * that should be protected.
1076b5ba474fSNhat Pham 	 */
1077b5ba474fSNhat Pham 	nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0;
1078b5ba474fSNhat Pham 
1079b5ba474fSNhat Pham 	/*
1080b5ba474fSNhat Pham 	 * Scale the number of freeable pages by the memory saving factor.
1081b5ba474fSNhat Pham 	 * This ensures that the better zswap compresses memory, the fewer
1082b5ba474fSNhat Pham 	 * pages we will evict to swap (as it will otherwise incur IO for
1083b5ba474fSNhat Pham 	 * relatively small memory saving).
1084b5ba474fSNhat Pham 	 */
1085b5ba474fSNhat Pham 	return mult_frac(nr_freeable, nr_backing, nr_stored);
1086b5ba474fSNhat Pham }
1087b5ba474fSNhat Pham 
1088b5ba474fSNhat Pham static void zswap_alloc_shrinker(struct zswap_pool *pool)
1089b5ba474fSNhat Pham {
1090b5ba474fSNhat Pham 	pool->shrinker =
1091b5ba474fSNhat Pham 		shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap");
1092b5ba474fSNhat Pham 	if (!pool->shrinker)
1093b5ba474fSNhat Pham 		return;
1094b5ba474fSNhat Pham 
1095b5ba474fSNhat Pham 	pool->shrinker->private_data = pool;
1096b5ba474fSNhat Pham 	pool->shrinker->scan_objects = zswap_shrinker_scan;
1097b5ba474fSNhat Pham 	pool->shrinker->count_objects = zswap_shrinker_count;
1098b5ba474fSNhat Pham 	pool->shrinker->batch = 0;
1099b5ba474fSNhat Pham 	pool->shrinker->seeks = DEFAULT_SEEKS;
1100b5ba474fSNhat Pham }
1101b5ba474fSNhat Pham 
1102b5ba474fSNhat Pham /*********************************
11032b281117SSeth Jennings * per-cpu code
11042b281117SSeth Jennings **********************************/
1105cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
1106f1c54846SDan Streetman {
1107cab7a7e5SSebastian Andrzej Siewior 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
11081ec3b5feSBarry Song 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
11091ec3b5feSBarry Song 	struct crypto_acomp *acomp;
11101ec3b5feSBarry Song 	struct acomp_req *req;
11118ba2f844SChengming Zhou 	int ret;
11128ba2f844SChengming Zhou 
11138ba2f844SChengming Zhou 	mutex_init(&acomp_ctx->mutex);
11148ba2f844SChengming Zhou 
11158ba2f844SChengming Zhou 	acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
11168ba2f844SChengming Zhou 	if (!acomp_ctx->buffer)
11178ba2f844SChengming Zhou 		return -ENOMEM;
1118f1c54846SDan Streetman 
11191ec3b5feSBarry Song 	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
11201ec3b5feSBarry Song 	if (IS_ERR(acomp)) {
11211ec3b5feSBarry Song 		pr_err("could not alloc crypto acomp %s : %ld\n",
11221ec3b5feSBarry Song 				pool->tfm_name, PTR_ERR(acomp));
11238ba2f844SChengming Zhou 		ret = PTR_ERR(acomp);
11248ba2f844SChengming Zhou 		goto acomp_fail;
11251ec3b5feSBarry Song 	}
11261ec3b5feSBarry Song 	acomp_ctx->acomp = acomp;
1127cab7a7e5SSebastian Andrzej Siewior 
11281ec3b5feSBarry Song 	req = acomp_request_alloc(acomp_ctx->acomp);
11291ec3b5feSBarry Song 	if (!req) {
11301ec3b5feSBarry Song 		pr_err("could not alloc crypto acomp_request %s\n",
11311ec3b5feSBarry Song 		       pool->tfm_name);
11328ba2f844SChengming Zhou 		ret = -ENOMEM;
11338ba2f844SChengming Zhou 		goto req_fail;
1134f1c54846SDan Streetman 	}
11351ec3b5feSBarry Song 	acomp_ctx->req = req;
11361ec3b5feSBarry Song 
11371ec3b5feSBarry Song 	crypto_init_wait(&acomp_ctx->wait);
11381ec3b5feSBarry Song 	/*
11391ec3b5feSBarry Song 	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
11401ec3b5feSBarry Song 	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
11411ec3b5feSBarry Song 	 * won't be called, crypto_wait_req() will return without blocking.
11421ec3b5feSBarry Song 	 */
11431ec3b5feSBarry Song 	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
11441ec3b5feSBarry Song 				   crypto_req_done, &acomp_ctx->wait);
11451ec3b5feSBarry Song 
1146cab7a7e5SSebastian Andrzej Siewior 	return 0;
11478ba2f844SChengming Zhou 
11488ba2f844SChengming Zhou req_fail:
11498ba2f844SChengming Zhou 	crypto_free_acomp(acomp_ctx->acomp);
11508ba2f844SChengming Zhou acomp_fail:
11518ba2f844SChengming Zhou 	kfree(acomp_ctx->buffer);
11528ba2f844SChengming Zhou 	return ret;
1153cab7a7e5SSebastian Andrzej Siewior }
1154cab7a7e5SSebastian Andrzej Siewior 
1155cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
1156cab7a7e5SSebastian Andrzej Siewior {
1157cab7a7e5SSebastian Andrzej Siewior 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
11581ec3b5feSBarry Song 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
1159cab7a7e5SSebastian Andrzej Siewior 
11601ec3b5feSBarry Song 	if (!IS_ERR_OR_NULL(acomp_ctx)) {
11611ec3b5feSBarry Song 		if (!IS_ERR_OR_NULL(acomp_ctx->req))
11621ec3b5feSBarry Song 			acomp_request_free(acomp_ctx->req);
11631ec3b5feSBarry Song 		if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
11641ec3b5feSBarry Song 			crypto_free_acomp(acomp_ctx->acomp);
11658ba2f844SChengming Zhou 		kfree(acomp_ctx->buffer);
11661ec3b5feSBarry Song 	}
11671ec3b5feSBarry Song 
1168f1c54846SDan Streetman 	return 0;
1169f1c54846SDan Streetman }
1170f1c54846SDan Streetman 
1171a65b0e76SDomenico Cerasuolo static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
1172a65b0e76SDomenico Cerasuolo 				       spinlock_t *lock, void *arg)
1173f999f38bSDomenico Cerasuolo {
1174a65b0e76SDomenico Cerasuolo 	struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
1175b5ba474fSNhat Pham 	bool *encountered_page_in_swapcache = (bool *)arg;
11765878303cSChengming Zhou 	swp_entry_t swpentry;
1177a65b0e76SDomenico Cerasuolo 	enum lru_status ret = LRU_REMOVED_RETRY;
1178a65b0e76SDomenico Cerasuolo 	int writeback_result;
1179f999f38bSDomenico Cerasuolo 
1180f999f38bSDomenico Cerasuolo 	/*
11815878303cSChengming Zhou 	 * Rotate the entry to the tail before unlocking the LRU,
11825878303cSChengming Zhou 	 * so that in case of an invalidation race concurrent
11835878303cSChengming Zhou 	 * reclaimers don't waste their time on it.
11845878303cSChengming Zhou 	 *
11855878303cSChengming Zhou 	 * If writeback succeeds, or failure is due to the entry
11865878303cSChengming Zhou 	 * being invalidated by the swap subsystem, the invalidation
11875878303cSChengming Zhou 	 * will unlink and free it.
11885878303cSChengming Zhou 	 *
11895878303cSChengming Zhou 	 * Temporary failures, where the same entry should be tried
11905878303cSChengming Zhou 	 * again immediately, almost never happen for this shrinker.
11915878303cSChengming Zhou 	 * We don't do any trylocking; -ENOMEM comes closest,
11925878303cSChengming Zhou 	 * but that's extremely rare and doesn't happen spuriously
11935878303cSChengming Zhou 	 * either. Don't bother distinguishing this case.
11945878303cSChengming Zhou 	 *
11955878303cSChengming Zhou 	 * But since they do exist in theory, the entry cannot just
11965878303cSChengming Zhou 	 * be unlinked, or we could leak it. Hence, rotate.
11975878303cSChengming Zhou 	 */
11985878303cSChengming Zhou 	list_move_tail(item, &l->list);
11995878303cSChengming Zhou 
12005878303cSChengming Zhou 	/*
1201f999f38bSDomenico Cerasuolo 	 * Once the lru lock is dropped, the entry might get freed. The
12025878303cSChengming Zhou 	 * swpentry is copied to the stack, and entry isn't deref'd again
1203f999f38bSDomenico Cerasuolo 	 * until the entry is verified to still be alive in the tree.
1204f999f38bSDomenico Cerasuolo 	 */
12055878303cSChengming Zhou 	swpentry = entry->swpentry;
12065878303cSChengming Zhou 
1207a65b0e76SDomenico Cerasuolo 	/*
1208a65b0e76SDomenico Cerasuolo 	 * It's safe to drop the lock here because we return either
1209a65b0e76SDomenico Cerasuolo 	 * LRU_REMOVED_RETRY or LRU_RETRY.
1210a65b0e76SDomenico Cerasuolo 	 */
1211a65b0e76SDomenico Cerasuolo 	spin_unlock(lock);
1212f999f38bSDomenico Cerasuolo 
12135878303cSChengming Zhou 	writeback_result = zswap_writeback_entry(entry, swpentry);
1214a65b0e76SDomenico Cerasuolo 
1215a65b0e76SDomenico Cerasuolo 	if (writeback_result) {
1216a65b0e76SDomenico Cerasuolo 		zswap_reject_reclaim_fail++;
1217a65b0e76SDomenico Cerasuolo 		ret = LRU_RETRY;
1218b5ba474fSNhat Pham 
1219b5ba474fSNhat Pham 		/*
1220b5ba474fSNhat Pham 		 * Encountering a page already in swap cache is a sign that we are shrinking
1221b5ba474fSNhat Pham 		 * into the warmer region. We should terminate shrinking (if we're in the dynamic
1222b5ba474fSNhat Pham 		 * shrinker context).
1223b5ba474fSNhat Pham 		 */
122427d3969bSChengming Zhou 		if (writeback_result == -EEXIST && encountered_page_in_swapcache)
1225b5ba474fSNhat Pham 			*encountered_page_in_swapcache = true;
12265878303cSChengming Zhou 	} else {
1227a65b0e76SDomenico Cerasuolo 		zswap_written_back_pages++;
12285878303cSChengming Zhou 	}
1229f999f38bSDomenico Cerasuolo 
1230a65b0e76SDomenico Cerasuolo 	spin_lock(lock);
1231a65b0e76SDomenico Cerasuolo 	return ret;
1232a65b0e76SDomenico Cerasuolo }
1233a65b0e76SDomenico Cerasuolo 
1234a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg)
1235a65b0e76SDomenico Cerasuolo {
1236a65b0e76SDomenico Cerasuolo 	struct zswap_pool *pool;
1237a65b0e76SDomenico Cerasuolo 	int nid, shrunk = 0;
1238a65b0e76SDomenico Cerasuolo 
1239501a06feSNhat Pham 	if (!mem_cgroup_zswap_writeback_enabled(memcg))
1240501a06feSNhat Pham 		return -EINVAL;
1241501a06feSNhat Pham 
1242a65b0e76SDomenico Cerasuolo 	/*
1243a65b0e76SDomenico Cerasuolo 	 * Skip zombies because their LRUs are reparented and we would be
1244a65b0e76SDomenico Cerasuolo 	 * reclaiming from the parent instead of the dead memcg.
1245a65b0e76SDomenico Cerasuolo 	 */
1246a65b0e76SDomenico Cerasuolo 	if (memcg && !mem_cgroup_online(memcg))
1247a65b0e76SDomenico Cerasuolo 		return -ENOENT;
1248a65b0e76SDomenico Cerasuolo 
1249a65b0e76SDomenico Cerasuolo 	pool = zswap_pool_current_get();
1250a65b0e76SDomenico Cerasuolo 	if (!pool)
1251a65b0e76SDomenico Cerasuolo 		return -EINVAL;
1252a65b0e76SDomenico Cerasuolo 
1253a65b0e76SDomenico Cerasuolo 	for_each_node_state(nid, N_NORMAL_MEMORY) {
1254a65b0e76SDomenico Cerasuolo 		unsigned long nr_to_walk = 1;
1255a65b0e76SDomenico Cerasuolo 
1256a65b0e76SDomenico Cerasuolo 		shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg,
1257a65b0e76SDomenico Cerasuolo 					    &shrink_memcg_cb, NULL, &nr_to_walk);
1258a65b0e76SDomenico Cerasuolo 	}
1259a65b0e76SDomenico Cerasuolo 	zswap_pool_put(pool);
1260a65b0e76SDomenico Cerasuolo 	return shrunk ? 0 : -EAGAIN;
1261f999f38bSDomenico Cerasuolo }
1262f999f38bSDomenico Cerasuolo 
126345190f01SVitaly Wool static void shrink_worker(struct work_struct *w)
126445190f01SVitaly Wool {
126545190f01SVitaly Wool 	struct zswap_pool *pool = container_of(w, typeof(*pool),
126645190f01SVitaly Wool 						shrink_work);
1267a65b0e76SDomenico Cerasuolo 	struct mem_cgroup *memcg;
1268e0228d59SDomenico Cerasuolo 	int ret, failures = 0;
126945190f01SVitaly Wool 
1270a65b0e76SDomenico Cerasuolo 	/* global reclaim will select cgroup in a round-robin fashion. */
1271e0228d59SDomenico Cerasuolo 	do {
1272a65b0e76SDomenico Cerasuolo 		spin_lock(&zswap_pools_lock);
1273a65b0e76SDomenico Cerasuolo 		pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
1274a65b0e76SDomenico Cerasuolo 		memcg = pool->next_shrink;
1275a65b0e76SDomenico Cerasuolo 
1276a65b0e76SDomenico Cerasuolo 		/*
1277a65b0e76SDomenico Cerasuolo 		 * We need to retry if we have gone through a full round trip, or if we
1278a65b0e76SDomenico Cerasuolo 		 * got an offline memcg (or else we risk undoing the effect of the
1279a65b0e76SDomenico Cerasuolo 		 * zswap memcg offlining cleanup callback). This is not catastrophic
1280a65b0e76SDomenico Cerasuolo 		 * per se, but it will keep the now offlined memcg hostage for a while.
1281a65b0e76SDomenico Cerasuolo 		 *
1282a65b0e76SDomenico Cerasuolo 		 * Note that if we got an online memcg, we will keep the extra
1283a65b0e76SDomenico Cerasuolo 		 * reference in case the original reference obtained by mem_cgroup_iter
1284a65b0e76SDomenico Cerasuolo 		 * is dropped by the zswap memcg offlining callback, ensuring that the
1285a65b0e76SDomenico Cerasuolo 		 * memcg is not killed when we are reclaiming.
1286a65b0e76SDomenico Cerasuolo 		 */
1287a65b0e76SDomenico Cerasuolo 		if (!memcg) {
1288a65b0e76SDomenico Cerasuolo 			spin_unlock(&zswap_pools_lock);
1289e0228d59SDomenico Cerasuolo 			if (++failures == MAX_RECLAIM_RETRIES)
1290e0228d59SDomenico Cerasuolo 				break;
1291a65b0e76SDomenico Cerasuolo 
1292a65b0e76SDomenico Cerasuolo 			goto resched;
1293e0228d59SDomenico Cerasuolo 		}
1294a65b0e76SDomenico Cerasuolo 
1295a65b0e76SDomenico Cerasuolo 		if (!mem_cgroup_tryget_online(memcg)) {
1296a65b0e76SDomenico Cerasuolo 			/* drop the reference from mem_cgroup_iter() */
1297a65b0e76SDomenico Cerasuolo 			mem_cgroup_iter_break(NULL, memcg);
1298a65b0e76SDomenico Cerasuolo 			pool->next_shrink = NULL;
1299a65b0e76SDomenico Cerasuolo 			spin_unlock(&zswap_pools_lock);
1300a65b0e76SDomenico Cerasuolo 
1301a65b0e76SDomenico Cerasuolo 			if (++failures == MAX_RECLAIM_RETRIES)
1302a65b0e76SDomenico Cerasuolo 				break;
1303a65b0e76SDomenico Cerasuolo 
1304a65b0e76SDomenico Cerasuolo 			goto resched;
1305a65b0e76SDomenico Cerasuolo 		}
1306a65b0e76SDomenico Cerasuolo 		spin_unlock(&zswap_pools_lock);
1307a65b0e76SDomenico Cerasuolo 
1308a65b0e76SDomenico Cerasuolo 		ret = shrink_memcg(memcg);
1309a65b0e76SDomenico Cerasuolo 		/* drop the extra reference */
1310a65b0e76SDomenico Cerasuolo 		mem_cgroup_put(memcg);
1311a65b0e76SDomenico Cerasuolo 
1312a65b0e76SDomenico Cerasuolo 		if (ret == -EINVAL)
1313a65b0e76SDomenico Cerasuolo 			break;
1314a65b0e76SDomenico Cerasuolo 		if (ret && ++failures == MAX_RECLAIM_RETRIES)
1315a65b0e76SDomenico Cerasuolo 			break;
1316a65b0e76SDomenico Cerasuolo 
1317a65b0e76SDomenico Cerasuolo resched:
1318e0228d59SDomenico Cerasuolo 		cond_resched();
1319e0228d59SDomenico Cerasuolo 	} while (!zswap_can_accept());
132045190f01SVitaly Wool 	zswap_pool_put(pool);
132145190f01SVitaly Wool }
132245190f01SVitaly Wool 
1323fa9ad6e2SJohannes Weiner static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
1324fa9ad6e2SJohannes Weiner {
1325fa9ad6e2SJohannes Weiner 	struct crypto_acomp_ctx *acomp_ctx;
1326fa9ad6e2SJohannes Weiner 	struct scatterlist input, output;
1327fa9ad6e2SJohannes Weiner 	unsigned int dlen = PAGE_SIZE;
1328fa9ad6e2SJohannes Weiner 	unsigned long handle;
1329fa9ad6e2SJohannes Weiner 	struct zpool *zpool;
1330fa9ad6e2SJohannes Weiner 	char *buf;
1331fa9ad6e2SJohannes Weiner 	gfp_t gfp;
1332fa9ad6e2SJohannes Weiner 	int ret;
1333fa9ad6e2SJohannes Weiner 	u8 *dst;
1334fa9ad6e2SJohannes Weiner 
1335fa9ad6e2SJohannes Weiner 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1336fa9ad6e2SJohannes Weiner 
1337fa9ad6e2SJohannes Weiner 	mutex_lock(&acomp_ctx->mutex);
1338fa9ad6e2SJohannes Weiner 
1339fa9ad6e2SJohannes Weiner 	dst = acomp_ctx->buffer;
1340fa9ad6e2SJohannes Weiner 	sg_init_table(&input, 1);
1341fa9ad6e2SJohannes Weiner 	sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
1342fa9ad6e2SJohannes Weiner 
1343fa9ad6e2SJohannes Weiner 	/*
1344fa9ad6e2SJohannes Weiner 	 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
1345fa9ad6e2SJohannes Weiner 	 * and hardware-accelerators may won't check the dst buffer size, so
1346fa9ad6e2SJohannes Weiner 	 * giving the dst buffer with enough length to avoid buffer overflow.
1347fa9ad6e2SJohannes Weiner 	 */
1348fa9ad6e2SJohannes Weiner 	sg_init_one(&output, dst, PAGE_SIZE * 2);
1349fa9ad6e2SJohannes Weiner 	acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1350fa9ad6e2SJohannes Weiner 
1351fa9ad6e2SJohannes Weiner 	/*
1352fa9ad6e2SJohannes Weiner 	 * it maybe looks a little bit silly that we send an asynchronous request,
1353fa9ad6e2SJohannes Weiner 	 * then wait for its completion synchronously. This makes the process look
1354fa9ad6e2SJohannes Weiner 	 * synchronous in fact.
1355fa9ad6e2SJohannes Weiner 	 * Theoretically, acomp supports users send multiple acomp requests in one
1356fa9ad6e2SJohannes Weiner 	 * acomp instance, then get those requests done simultaneously. but in this
1357fa9ad6e2SJohannes Weiner 	 * case, zswap actually does store and load page by page, there is no
1358fa9ad6e2SJohannes Weiner 	 * existing method to send the second page before the first page is done
1359fa9ad6e2SJohannes Weiner 	 * in one thread doing zwap.
1360fa9ad6e2SJohannes Weiner 	 * but in different threads running on different cpu, we have different
1361fa9ad6e2SJohannes Weiner 	 * acomp instance, so multiple threads can do (de)compression in parallel.
1362fa9ad6e2SJohannes Weiner 	 */
1363fa9ad6e2SJohannes Weiner 	ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1364fa9ad6e2SJohannes Weiner 	dlen = acomp_ctx->req->dlen;
1365fa9ad6e2SJohannes Weiner 	if (ret) {
1366fa9ad6e2SJohannes Weiner 		zswap_reject_compress_fail++;
1367fa9ad6e2SJohannes Weiner 		goto unlock;
1368fa9ad6e2SJohannes Weiner 	}
1369fa9ad6e2SJohannes Weiner 
1370fa9ad6e2SJohannes Weiner 	zpool = zswap_find_zpool(entry);
1371fa9ad6e2SJohannes Weiner 	gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1372fa9ad6e2SJohannes Weiner 	if (zpool_malloc_support_movable(zpool))
1373fa9ad6e2SJohannes Weiner 		gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1374fa9ad6e2SJohannes Weiner 	ret = zpool_malloc(zpool, dlen, gfp, &handle);
1375fa9ad6e2SJohannes Weiner 	if (ret == -ENOSPC) {
1376fa9ad6e2SJohannes Weiner 		zswap_reject_compress_poor++;
1377fa9ad6e2SJohannes Weiner 		goto unlock;
1378fa9ad6e2SJohannes Weiner 	}
1379fa9ad6e2SJohannes Weiner 	if (ret) {
1380fa9ad6e2SJohannes Weiner 		zswap_reject_alloc_fail++;
1381fa9ad6e2SJohannes Weiner 		goto unlock;
1382fa9ad6e2SJohannes Weiner 	}
1383fa9ad6e2SJohannes Weiner 
1384fa9ad6e2SJohannes Weiner 	buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
1385fa9ad6e2SJohannes Weiner 	memcpy(buf, dst, dlen);
1386fa9ad6e2SJohannes Weiner 	zpool_unmap_handle(zpool, handle);
1387fa9ad6e2SJohannes Weiner 
1388fa9ad6e2SJohannes Weiner 	entry->handle = handle;
1389fa9ad6e2SJohannes Weiner 	entry->length = dlen;
1390fa9ad6e2SJohannes Weiner 
1391fa9ad6e2SJohannes Weiner unlock:
1392fa9ad6e2SJohannes Weiner 	mutex_unlock(&acomp_ctx->mutex);
1393fa9ad6e2SJohannes Weiner 	return ret == 0;
1394fa9ad6e2SJohannes Weiner }
1395fa9ad6e2SJohannes Weiner 
1396ff2972aaSJohannes Weiner static void zswap_decompress(struct zswap_entry *entry, struct page *page)
139732acba4cSChengming Zhou {
139832acba4cSChengming Zhou 	struct zpool *zpool = zswap_find_zpool(entry);
139932acba4cSChengming Zhou 	struct scatterlist input, output;
140032acba4cSChengming Zhou 	struct crypto_acomp_ctx *acomp_ctx;
140132acba4cSChengming Zhou 	u8 *src;
140232acba4cSChengming Zhou 
140332acba4cSChengming Zhou 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
14048ba2f844SChengming Zhou 	mutex_lock(&acomp_ctx->mutex);
140532acba4cSChengming Zhou 
140632acba4cSChengming Zhou 	src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
140732acba4cSChengming Zhou 	if (!zpool_can_sleep_mapped(zpool)) {
14088ba2f844SChengming Zhou 		memcpy(acomp_ctx->buffer, src, entry->length);
14098ba2f844SChengming Zhou 		src = acomp_ctx->buffer;
141032acba4cSChengming Zhou 		zpool_unmap_handle(zpool, entry->handle);
141132acba4cSChengming Zhou 	}
141232acba4cSChengming Zhou 
141332acba4cSChengming Zhou 	sg_init_one(&input, src, entry->length);
141432acba4cSChengming Zhou 	sg_init_table(&output, 1);
141532acba4cSChengming Zhou 	sg_set_page(&output, page, PAGE_SIZE, 0);
141632acba4cSChengming Zhou 	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
141732acba4cSChengming Zhou 	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
141832acba4cSChengming Zhou 	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
14198ba2f844SChengming Zhou 	mutex_unlock(&acomp_ctx->mutex);
142032acba4cSChengming Zhou 
142132acba4cSChengming Zhou 	if (zpool_can_sleep_mapped(zpool))
142232acba4cSChengming Zhou 		zpool_unmap_handle(zpool, entry->handle);
142332acba4cSChengming Zhou }
142432acba4cSChengming Zhou 
142590b0fc26SDan Streetman /*********************************
14262b281117SSeth Jennings * writeback code
14272b281117SSeth Jennings **********************************/
14282b281117SSeth Jennings /*
142996c7b0b4SMatthew Wilcox (Oracle)  * Attempts to free an entry by adding a folio to the swap cache,
143096c7b0b4SMatthew Wilcox (Oracle)  * decompressing the entry data into the folio, and issuing a
143196c7b0b4SMatthew Wilcox (Oracle)  * bio write to write the folio back to the swap device.
14322b281117SSeth Jennings  *
143396c7b0b4SMatthew Wilcox (Oracle)  * This can be thought of as a "resumed writeback" of the folio
14342b281117SSeth Jennings  * to the swap device.  We are basically resuming the same swap
143542c06a0eSJohannes Weiner  * writeback path that was intercepted with the zswap_store()
143696c7b0b4SMatthew Wilcox (Oracle)  * in the first place.  After the folio has been decompressed into
14372b281117SSeth Jennings  * the swap cache, the compressed version stored by zswap can be
14382b281117SSeth Jennings  * freed.
14392b281117SSeth Jennings  */
14400bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry,
14415878303cSChengming Zhou 				 swp_entry_t swpentry)
14422b281117SSeth Jennings {
14435878303cSChengming Zhou 	struct zswap_tree *tree;
144496c7b0b4SMatthew Wilcox (Oracle) 	struct folio *folio;
1445ddc1a5cbSHugh Dickins 	struct mempolicy *mpol;
144696c7b0b4SMatthew Wilcox (Oracle) 	bool folio_was_allocated;
14472b281117SSeth Jennings 	struct writeback_control wbc = {
14482b281117SSeth Jennings 		.sync_mode = WB_SYNC_NONE,
14492b281117SSeth Jennings 	};
14502b281117SSeth Jennings 
145196c7b0b4SMatthew Wilcox (Oracle) 	/* try to allocate swap cache folio */
1452ddc1a5cbSHugh Dickins 	mpol = get_task_policy(current);
145396c7b0b4SMatthew Wilcox (Oracle) 	folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
145496c7b0b4SMatthew Wilcox (Oracle) 				NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
145596c7b0b4SMatthew Wilcox (Oracle) 	if (!folio)
1456e947ba0bSChengming Zhou 		return -ENOMEM;
14572b281117SSeth Jennings 
1458e947ba0bSChengming Zhou 	/*
14595878303cSChengming Zhou 	 * Found an existing folio, we raced with swapin or concurrent
14605878303cSChengming Zhou 	 * shrinker. We generally writeback cold folios from zswap, and
14615878303cSChengming Zhou 	 * swapin means the folio just became hot, so skip this folio.
14625878303cSChengming Zhou 	 * For unlikely concurrent shrinker case, it will be unlinked
14635878303cSChengming Zhou 	 * and freed when invalidated by the concurrent shrinker anyway.
1464e947ba0bSChengming Zhou 	 */
146596c7b0b4SMatthew Wilcox (Oracle) 	if (!folio_was_allocated) {
146696c7b0b4SMatthew Wilcox (Oracle) 		folio_put(folio);
1467e947ba0bSChengming Zhou 		return -EEXIST;
146898804a94SJohannes Weiner 	}
14692b281117SSeth Jennings 
147004fc7816SDomenico Cerasuolo 	/*
147196c7b0b4SMatthew Wilcox (Oracle) 	 * folio is locked, and the swapcache is now secured against
147298804a94SJohannes Weiner 	 * concurrent swapping to and from the slot. Verify that the
147398804a94SJohannes Weiner 	 * swap entry hasn't been invalidated and recycled behind our
147498804a94SJohannes Weiner 	 * backs (our zswap_entry reference doesn't prevent that), to
147596c7b0b4SMatthew Wilcox (Oracle) 	 * avoid overwriting a new swap folio with old compressed data.
147604fc7816SDomenico Cerasuolo 	 */
14775878303cSChengming Zhou 	tree = swap_zswap_tree(swpentry);
147804fc7816SDomenico Cerasuolo 	spin_lock(&tree->lock);
14795878303cSChengming Zhou 	if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
148004fc7816SDomenico Cerasuolo 		spin_unlock(&tree->lock);
148196c7b0b4SMatthew Wilcox (Oracle) 		delete_from_swap_cache(folio);
1482e3b63e96SYosry Ahmed 		folio_unlock(folio);
1483e3b63e96SYosry Ahmed 		folio_put(folio);
1484e947ba0bSChengming Zhou 		return -ENOMEM;
148504fc7816SDomenico Cerasuolo 	}
14865878303cSChengming Zhou 
14875878303cSChengming Zhou 	/* Safe to deref entry after the entry is verified above. */
14885878303cSChengming Zhou 	zswap_entry_get(entry);
148904fc7816SDomenico Cerasuolo 	spin_unlock(&tree->lock);
149004fc7816SDomenico Cerasuolo 
1491ff2972aaSJohannes Weiner 	zswap_decompress(entry, &folio->page);
14922b281117SSeth Jennings 
14935878303cSChengming Zhou 	count_vm_event(ZSWPWB);
14945878303cSChengming Zhou 	if (entry->objcg)
14955878303cSChengming Zhou 		count_objcg_event(entry->objcg, ZSWPWB);
14965878303cSChengming Zhou 
14975878303cSChengming Zhou 	spin_lock(&tree->lock);
14985878303cSChengming Zhou 	zswap_invalidate_entry(tree, entry);
14995878303cSChengming Zhou 	zswap_entry_put(entry);
15005878303cSChengming Zhou 	spin_unlock(&tree->lock);
15015878303cSChengming Zhou 
150296c7b0b4SMatthew Wilcox (Oracle) 	/* folio is up to date */
150396c7b0b4SMatthew Wilcox (Oracle) 	folio_mark_uptodate(folio);
15042b281117SSeth Jennings 
1505b349acc7SWeijie Yang 	/* move it to the tail of the inactive list after end_writeback */
150696c7b0b4SMatthew Wilcox (Oracle) 	folio_set_reclaim(folio);
1507b349acc7SWeijie Yang 
15082b281117SSeth Jennings 	/* start writeback */
1509b99b4e0dSMatthew Wilcox (Oracle) 	__swap_writepage(folio, &wbc);
151096c7b0b4SMatthew Wilcox (Oracle) 	folio_put(folio);
15112b281117SSeth Jennings 
1512e947ba0bSChengming Zhou 	return 0;
15132b281117SSeth Jennings }
15142b281117SSeth Jennings 
1515a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1516a85f878bSSrividya Desireddy {
1517a85f878bSSrividya Desireddy 	unsigned long *page;
151862bf1258STaejoon Song 	unsigned long val;
151962bf1258STaejoon Song 	unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
1520a85f878bSSrividya Desireddy 
1521a85f878bSSrividya Desireddy 	page = (unsigned long *)ptr;
152262bf1258STaejoon Song 	val = page[0];
152362bf1258STaejoon Song 
152462bf1258STaejoon Song 	if (val != page[last_pos])
152562bf1258STaejoon Song 		return 0;
152662bf1258STaejoon Song 
152762bf1258STaejoon Song 	for (pos = 1; pos < last_pos; pos++) {
152862bf1258STaejoon Song 		if (val != page[pos])
1529a85f878bSSrividya Desireddy 			return 0;
1530a85f878bSSrividya Desireddy 	}
153162bf1258STaejoon Song 
153262bf1258STaejoon Song 	*value = val;
153362bf1258STaejoon Song 
1534a85f878bSSrividya Desireddy 	return 1;
1535a85f878bSSrividya Desireddy }
1536a85f878bSSrividya Desireddy 
1537a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value)
1538a85f878bSSrividya Desireddy {
1539a85f878bSSrividya Desireddy 	unsigned long *page;
1540a85f878bSSrividya Desireddy 
1541a85f878bSSrividya Desireddy 	page = (unsigned long *)ptr;
1542a85f878bSSrividya Desireddy 	memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1543a85f878bSSrividya Desireddy }
1544a85f878bSSrividya Desireddy 
154534f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio)
15462b281117SSeth Jennings {
15473d2c9087SDavid Hildenbrand 	swp_entry_t swp = folio->swap;
154842c06a0eSJohannes Weiner 	pgoff_t offset = swp_offset(swp);
154944c7c734SChengming Zhou 	struct zswap_tree *tree = swap_zswap_tree(swp);
15502b281117SSeth Jennings 	struct zswap_entry *entry, *dupentry;
1551f4840ccfSJohannes Weiner 	struct obj_cgroup *objcg = NULL;
1552a65b0e76SDomenico Cerasuolo 	struct mem_cgroup *memcg = NULL;
1553be7fc97cSJohannes Weiner 	struct zswap_pool *shrink_pool;
155442c06a0eSJohannes Weiner 
155534f4c198SMatthew Wilcox (Oracle) 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
155634f4c198SMatthew Wilcox (Oracle) 	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
15572b281117SSeth Jennings 
155834f4c198SMatthew Wilcox (Oracle) 	/* Large folios aren't supported */
155934f4c198SMatthew Wilcox (Oracle) 	if (folio_test_large(folio))
156042c06a0eSJohannes Weiner 		return false;
15617ba71669SHuang Ying 
15620bdf0efaSNhat Pham 	/*
1563ca56489cSDomenico Cerasuolo 	 * If this is a duplicate, it must be removed before attempting to store
1564ca56489cSDomenico Cerasuolo 	 * it, otherwise, if the store fails the old page won't be removed from
1565ca56489cSDomenico Cerasuolo 	 * the tree, and it might be written back overriding the new data.
1566ca56489cSDomenico Cerasuolo 	 */
1567ca56489cSDomenico Cerasuolo 	spin_lock(&tree->lock);
1568be7fc97cSJohannes Weiner 	entry = zswap_rb_search(&tree->rbroot, offset);
1569be7fc97cSJohannes Weiner 	if (entry) {
1570be7fc97cSJohannes Weiner 		zswap_invalidate_entry(tree, entry);
1571ca56489cSDomenico Cerasuolo 		zswap_duplicate_entry++;
1572ca56489cSDomenico Cerasuolo 	}
1573ca56489cSDomenico Cerasuolo 	spin_unlock(&tree->lock);
1574678e54d4SChengming Zhou 
1575678e54d4SChengming Zhou 	if (!zswap_enabled)
1576678e54d4SChengming Zhou 		return false;
1577678e54d4SChengming Zhou 
1578074e3e26SMatthew Wilcox (Oracle) 	objcg = get_obj_cgroup_from_folio(folio);
1579a65b0e76SDomenico Cerasuolo 	if (objcg && !obj_cgroup_may_zswap(objcg)) {
1580a65b0e76SDomenico Cerasuolo 		memcg = get_mem_cgroup_from_objcg(objcg);
1581a65b0e76SDomenico Cerasuolo 		if (shrink_memcg(memcg)) {
1582a65b0e76SDomenico Cerasuolo 			mem_cgroup_put(memcg);
15830bdf0efaSNhat Pham 			goto reject;
1584a65b0e76SDomenico Cerasuolo 		}
1585a65b0e76SDomenico Cerasuolo 		mem_cgroup_put(memcg);
1586a65b0e76SDomenico Cerasuolo 	}
1587f4840ccfSJohannes Weiner 
15882b281117SSeth Jennings 	/* reclaim space if needed */
15892b281117SSeth Jennings 	if (zswap_is_full()) {
15902b281117SSeth Jennings 		zswap_pool_limit_hit++;
159145190f01SVitaly Wool 		zswap_pool_reached_full = true;
1592f4840ccfSJohannes Weiner 		goto shrink;
15932b281117SSeth Jennings 	}
159416e536efSLi Wang 
159545190f01SVitaly Wool 	if (zswap_pool_reached_full) {
159642c06a0eSJohannes Weiner 	       if (!zswap_can_accept())
1597e0228d59SDomenico Cerasuolo 			goto shrink;
159842c06a0eSJohannes Weiner 		else
159945190f01SVitaly Wool 			zswap_pool_reached_full = false;
16002b281117SSeth Jennings 	}
16012b281117SSeth Jennings 
16022b281117SSeth Jennings 	/* allocate entry */
1603be7fc97cSJohannes Weiner 	entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
16042b281117SSeth Jennings 	if (!entry) {
16052b281117SSeth Jennings 		zswap_reject_kmemcache_fail++;
16062b281117SSeth Jennings 		goto reject;
16072b281117SSeth Jennings 	}
16082b281117SSeth Jennings 
1609a85f878bSSrividya Desireddy 	if (zswap_same_filled_pages_enabled) {
1610be7fc97cSJohannes Weiner 		unsigned long value;
1611be7fc97cSJohannes Weiner 		u8 *src;
1612be7fc97cSJohannes Weiner 
1613be7fc97cSJohannes Weiner 		src = kmap_local_folio(folio, 0);
1614a85f878bSSrividya Desireddy 		if (zswap_is_page_same_filled(src, &value)) {
1615003ae2fbSFabio M. De Francesco 			kunmap_local(src);
1616a85f878bSSrividya Desireddy 			entry->length = 0;
1617a85f878bSSrividya Desireddy 			entry->value = value;
1618a85f878bSSrividya Desireddy 			atomic_inc(&zswap_same_filled_pages);
1619a85f878bSSrividya Desireddy 			goto insert_entry;
1620a85f878bSSrividya Desireddy 		}
1621003ae2fbSFabio M. De Francesco 		kunmap_local(src);
1622a85f878bSSrividya Desireddy 	}
1623a85f878bSSrividya Desireddy 
162442c06a0eSJohannes Weiner 	if (!zswap_non_same_filled_pages_enabled)
1625cb325dddSMaciej S. Szmigiero 		goto freepage;
1626cb325dddSMaciej S. Szmigiero 
1627f1c54846SDan Streetman 	/* if entry is successfully added, it keeps the reference */
1628f1c54846SDan Streetman 	entry->pool = zswap_pool_current_get();
162942c06a0eSJohannes Weiner 	if (!entry->pool)
16302b281117SSeth Jennings 		goto freepage;
16312b281117SSeth Jennings 
1632a65b0e76SDomenico Cerasuolo 	if (objcg) {
1633a65b0e76SDomenico Cerasuolo 		memcg = get_mem_cgroup_from_objcg(objcg);
1634a65b0e76SDomenico Cerasuolo 		if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) {
1635a65b0e76SDomenico Cerasuolo 			mem_cgroup_put(memcg);
1636a65b0e76SDomenico Cerasuolo 			goto put_pool;
1637a65b0e76SDomenico Cerasuolo 		}
1638a65b0e76SDomenico Cerasuolo 		mem_cgroup_put(memcg);
1639a65b0e76SDomenico Cerasuolo 	}
1640a65b0e76SDomenico Cerasuolo 
1641fa9ad6e2SJohannes Weiner 	if (!zswap_compress(folio, entry))
1642fa9ad6e2SJohannes Weiner 		goto put_pool;
16431ec3b5feSBarry Song 
1644a85f878bSSrividya Desireddy insert_entry:
1645be7fc97cSJohannes Weiner 	entry->swpentry = swp;
1646f4840ccfSJohannes Weiner 	entry->objcg = objcg;
1647f4840ccfSJohannes Weiner 	if (objcg) {
1648f4840ccfSJohannes Weiner 		obj_cgroup_charge_zswap(objcg, entry->length);
1649f4840ccfSJohannes Weiner 		/* Account before objcg ref is moved to tree */
1650f4840ccfSJohannes Weiner 		count_objcg_event(objcg, ZSWPOUT);
1651f4840ccfSJohannes Weiner 	}
1652f4840ccfSJohannes Weiner 
16532b281117SSeth Jennings 	/* map */
16542b281117SSeth Jennings 	spin_lock(&tree->lock);
1655ca56489cSDomenico Cerasuolo 	/*
1656ca56489cSDomenico Cerasuolo 	 * A duplicate entry should have been removed at the beginning of this
1657ca56489cSDomenico Cerasuolo 	 * function. Since the swap entry should be pinned, if a duplicate is
1658ca56489cSDomenico Cerasuolo 	 * found again here it means that something went wrong in the swap
1659ca56489cSDomenico Cerasuolo 	 * cache.
1660ca56489cSDomenico Cerasuolo 	 */
166142c06a0eSJohannes Weiner 	while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
1662ca56489cSDomenico Cerasuolo 		WARN_ON(1);
16632b281117SSeth Jennings 		zswap_duplicate_entry++;
166456c67049SJohannes Weiner 		zswap_invalidate_entry(tree, dupentry);
16652b281117SSeth Jennings 	}
166635499e2bSDomenico Cerasuolo 	if (entry->length) {
1667a65b0e76SDomenico Cerasuolo 		INIT_LIST_HEAD(&entry->lru);
1668a65b0e76SDomenico Cerasuolo 		zswap_lru_add(&entry->pool->list_lru, entry);
1669b5ba474fSNhat Pham 		atomic_inc(&entry->pool->nr_stored);
1670f999f38bSDomenico Cerasuolo 	}
16712b281117SSeth Jennings 	spin_unlock(&tree->lock);
16722b281117SSeth Jennings 
16732b281117SSeth Jennings 	/* update stats */
16742b281117SSeth Jennings 	atomic_inc(&zswap_stored_pages);
1675f1c54846SDan Streetman 	zswap_update_total_size();
1676f6498b77SJohannes Weiner 	count_vm_event(ZSWPOUT);
16772b281117SSeth Jennings 
167842c06a0eSJohannes Weiner 	return true;
16792b281117SSeth Jennings 
1680a65b0e76SDomenico Cerasuolo put_pool:
1681f1c54846SDan Streetman 	zswap_pool_put(entry->pool);
1682f1c54846SDan Streetman freepage:
16832b281117SSeth Jennings 	zswap_entry_cache_free(entry);
16842b281117SSeth Jennings reject:
1685f4840ccfSJohannes Weiner 	if (objcg)
1686f4840ccfSJohannes Weiner 		obj_cgroup_put(objcg);
168742c06a0eSJohannes Weiner 	return false;
1688f4840ccfSJohannes Weiner 
1689f4840ccfSJohannes Weiner shrink:
1690be7fc97cSJohannes Weiner 	shrink_pool = zswap_pool_last_get();
1691be7fc97cSJohannes Weiner 	if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work))
1692be7fc97cSJohannes Weiner 		zswap_pool_put(shrink_pool);
1693f4840ccfSJohannes Weiner 	goto reject;
16942b281117SSeth Jennings }
16952b281117SSeth Jennings 
1696ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio)
16972b281117SSeth Jennings {
16983d2c9087SDavid Hildenbrand 	swp_entry_t swp = folio->swap;
169942c06a0eSJohannes Weiner 	pgoff_t offset = swp_offset(swp);
1700ca54f6d8SMatthew Wilcox (Oracle) 	struct page *page = &folio->page;
170144c7c734SChengming Zhou 	struct zswap_tree *tree = swap_zswap_tree(swp);
17022b281117SSeth Jennings 	struct zswap_entry *entry;
170332acba4cSChengming Zhou 	u8 *dst;
170442c06a0eSJohannes Weiner 
1705ca54f6d8SMatthew Wilcox (Oracle) 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
17062b281117SSeth Jennings 
17072b281117SSeth Jennings 	spin_lock(&tree->lock);
17085b297f70SJohannes Weiner 	entry = zswap_rb_search(&tree->rbroot, offset);
17092b281117SSeth Jennings 	if (!entry) {
17102b281117SSeth Jennings 		spin_unlock(&tree->lock);
171142c06a0eSJohannes Weiner 		return false;
17122b281117SSeth Jennings 	}
17135b297f70SJohannes Weiner 	zswap_entry_get(entry);
17142b281117SSeth Jennings 	spin_unlock(&tree->lock);
17152b281117SSeth Jennings 
171666447fd0SChengming Zhou 	if (entry->length)
1717ff2972aaSJohannes Weiner 		zswap_decompress(entry, page);
171866447fd0SChengming Zhou 	else {
1719003ae2fbSFabio M. De Francesco 		dst = kmap_local_page(page);
1720a85f878bSSrividya Desireddy 		zswap_fill_page(dst, entry->value);
1721003ae2fbSFabio M. De Francesco 		kunmap_local(dst);
1722a85f878bSSrividya Desireddy 	}
1723a85f878bSSrividya Desireddy 
1724f6498b77SJohannes Weiner 	count_vm_event(ZSWPIN);
1725f4840ccfSJohannes Weiner 	if (entry->objcg)
1726f4840ccfSJohannes Weiner 		count_objcg_event(entry->objcg, ZSWPIN);
1727c75f5c1eSChengming Zhou 
17282b281117SSeth Jennings 	spin_lock(&tree->lock);
172966447fd0SChengming Zhou 	if (zswap_exclusive_loads_enabled) {
1730b9c91c43SYosry Ahmed 		zswap_invalidate_entry(tree, entry);
1731ca54f6d8SMatthew Wilcox (Oracle) 		folio_mark_dirty(folio);
173235499e2bSDomenico Cerasuolo 	} else if (entry->length) {
1733a65b0e76SDomenico Cerasuolo 		zswap_lru_del(&entry->pool->list_lru, entry);
1734a65b0e76SDomenico Cerasuolo 		zswap_lru_add(&entry->pool->list_lru, entry);
1735b9c91c43SYosry Ahmed 	}
1736db128f5fSYosry Ahmed 	zswap_entry_put(entry);
17372b281117SSeth Jennings 	spin_unlock(&tree->lock);
17382b281117SSeth Jennings 
173966447fd0SChengming Zhou 	return true;
17402b281117SSeth Jennings }
17412b281117SSeth Jennings 
174242c06a0eSJohannes Weiner void zswap_invalidate(int type, pgoff_t offset)
17432b281117SSeth Jennings {
174444c7c734SChengming Zhou 	struct zswap_tree *tree = swap_zswap_tree(swp_entry(type, offset));
17452b281117SSeth Jennings 	struct zswap_entry *entry;
17462b281117SSeth Jennings 
17472b281117SSeth Jennings 	spin_lock(&tree->lock);
17482b281117SSeth Jennings 	entry = zswap_rb_search(&tree->rbroot, offset);
174906ed2289SJohannes Weiner 	if (entry)
1750b9c91c43SYosry Ahmed 		zswap_invalidate_entry(tree, entry);
17512b281117SSeth Jennings 	spin_unlock(&tree->lock);
17522b281117SSeth Jennings }
17532b281117SSeth Jennings 
175444c7c734SChengming Zhou int zswap_swapon(int type, unsigned long nr_pages)
175542c06a0eSJohannes Weiner {
175644c7c734SChengming Zhou 	struct zswap_tree *trees, *tree;
175744c7c734SChengming Zhou 	unsigned int nr, i;
175842c06a0eSJohannes Weiner 
175944c7c734SChengming Zhou 	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
176044c7c734SChengming Zhou 	trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL);
176144c7c734SChengming Zhou 	if (!trees) {
176242c06a0eSJohannes Weiner 		pr_err("alloc failed, zswap disabled for swap type %d\n", type);
1763bb29fd77SChengming Zhou 		return -ENOMEM;
176442c06a0eSJohannes Weiner 	}
176542c06a0eSJohannes Weiner 
176644c7c734SChengming Zhou 	for (i = 0; i < nr; i++) {
176744c7c734SChengming Zhou 		tree = trees + i;
176842c06a0eSJohannes Weiner 		tree->rbroot = RB_ROOT;
176942c06a0eSJohannes Weiner 		spin_lock_init(&tree->lock);
177044c7c734SChengming Zhou 	}
177144c7c734SChengming Zhou 
177244c7c734SChengming Zhou 	nr_zswap_trees[type] = nr;
177344c7c734SChengming Zhou 	zswap_trees[type] = trees;
1774bb29fd77SChengming Zhou 	return 0;
177542c06a0eSJohannes Weiner }
177642c06a0eSJohannes Weiner 
177742c06a0eSJohannes Weiner void zswap_swapoff(int type)
17782b281117SSeth Jennings {
177944c7c734SChengming Zhou 	struct zswap_tree *trees = zswap_trees[type];
178044c7c734SChengming Zhou 	unsigned int i;
17812b281117SSeth Jennings 
178244c7c734SChengming Zhou 	if (!trees)
17832b281117SSeth Jennings 		return;
17842b281117SSeth Jennings 
178583e68f25SYosry Ahmed 	/* try_to_unuse() invalidated all the entries already */
178683e68f25SYosry Ahmed 	for (i = 0; i < nr_zswap_trees[type]; i++)
178783e68f25SYosry Ahmed 		WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot));
178844c7c734SChengming Zhou 
178944c7c734SChengming Zhou 	kvfree(trees);
179044c7c734SChengming Zhou 	nr_zswap_trees[type] = 0;
1791aa9bca05SWeijie Yang 	zswap_trees[type] = NULL;
17922b281117SSeth Jennings }
17932b281117SSeth Jennings 
17942b281117SSeth Jennings /*********************************
17952b281117SSeth Jennings * debugfs functions
17962b281117SSeth Jennings **********************************/
17972b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS
17982b281117SSeth Jennings #include <linux/debugfs.h>
17992b281117SSeth Jennings 
18002b281117SSeth Jennings static struct dentry *zswap_debugfs_root;
18012b281117SSeth Jennings 
1802141fdeecSLiu Shixin static int zswap_debugfs_init(void)
18032b281117SSeth Jennings {
18042b281117SSeth Jennings 	if (!debugfs_initialized())
18052b281117SSeth Jennings 		return -ENODEV;
18062b281117SSeth Jennings 
18072b281117SSeth Jennings 	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
18082b281117SSeth Jennings 
18090825a6f9SJoe Perches 	debugfs_create_u64("pool_limit_hit", 0444,
18102b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_pool_limit_hit);
18110825a6f9SJoe Perches 	debugfs_create_u64("reject_reclaim_fail", 0444,
18122b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_reject_reclaim_fail);
18130825a6f9SJoe Perches 	debugfs_create_u64("reject_alloc_fail", 0444,
18142b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_reject_alloc_fail);
18150825a6f9SJoe Perches 	debugfs_create_u64("reject_kmemcache_fail", 0444,
18162b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_reject_kmemcache_fail);
1817cb61dad8SNhat Pham 	debugfs_create_u64("reject_compress_fail", 0444,
1818cb61dad8SNhat Pham 			   zswap_debugfs_root, &zswap_reject_compress_fail);
18190825a6f9SJoe Perches 	debugfs_create_u64("reject_compress_poor", 0444,
18202b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_reject_compress_poor);
18210825a6f9SJoe Perches 	debugfs_create_u64("written_back_pages", 0444,
18222b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_written_back_pages);
18230825a6f9SJoe Perches 	debugfs_create_u64("duplicate_entry", 0444,
18242b281117SSeth Jennings 			   zswap_debugfs_root, &zswap_duplicate_entry);
18250825a6f9SJoe Perches 	debugfs_create_u64("pool_total_size", 0444,
182612d79d64SDan Streetman 			   zswap_debugfs_root, &zswap_pool_total_size);
18270825a6f9SJoe Perches 	debugfs_create_atomic_t("stored_pages", 0444,
18282b281117SSeth Jennings 				zswap_debugfs_root, &zswap_stored_pages);
1829a85f878bSSrividya Desireddy 	debugfs_create_atomic_t("same_filled_pages", 0444,
1830a85f878bSSrividya Desireddy 				zswap_debugfs_root, &zswap_same_filled_pages);
18312b281117SSeth Jennings 
18322b281117SSeth Jennings 	return 0;
18332b281117SSeth Jennings }
18342b281117SSeth Jennings #else
1835141fdeecSLiu Shixin static int zswap_debugfs_init(void)
18362b281117SSeth Jennings {
18372b281117SSeth Jennings 	return 0;
18382b281117SSeth Jennings }
18392b281117SSeth Jennings #endif
18402b281117SSeth Jennings 
18412b281117SSeth Jennings /*********************************
18422b281117SSeth Jennings * module init and exit
18432b281117SSeth Jennings **********************************/
1844141fdeecSLiu Shixin static int zswap_setup(void)
18452b281117SSeth Jennings {
1846f1c54846SDan Streetman 	struct zswap_pool *pool;
1847ad7ed770SSebastian Andrzej Siewior 	int ret;
184860105e12SMinchan Kim 
1849b7919122SLiu Shixin 	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
1850b7919122SLiu Shixin 	if (!zswap_entry_cache) {
18512b281117SSeth Jennings 		pr_err("entry cache creation failed\n");
1852f1c54846SDan Streetman 		goto cache_fail;
18532b281117SSeth Jennings 	}
1854f1c54846SDan Streetman 
1855cab7a7e5SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1856cab7a7e5SSebastian Andrzej Siewior 				      "mm/zswap_pool:prepare",
1857cab7a7e5SSebastian Andrzej Siewior 				      zswap_cpu_comp_prepare,
1858cab7a7e5SSebastian Andrzej Siewior 				      zswap_cpu_comp_dead);
1859cab7a7e5SSebastian Andrzej Siewior 	if (ret)
1860cab7a7e5SSebastian Andrzej Siewior 		goto hp_fail;
1861cab7a7e5SSebastian Andrzej Siewior 
1862f1c54846SDan Streetman 	pool = __zswap_pool_create_fallback();
1863ae3d89a7SDan Streetman 	if (pool) {
1864f1c54846SDan Streetman 		pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1865b8cf32dcSYosry Ahmed 			zpool_get_type(pool->zpools[0]));
1866f1c54846SDan Streetman 		list_add(&pool->list, &zswap_pools);
1867ae3d89a7SDan Streetman 		zswap_has_pool = true;
1868ae3d89a7SDan Streetman 	} else {
1869ae3d89a7SDan Streetman 		pr_err("pool creation failed\n");
1870ae3d89a7SDan Streetman 		zswap_enabled = false;
1871ae3d89a7SDan Streetman 	}
187260105e12SMinchan Kim 
18738409a385SRonald Monthero 	shrink_wq = alloc_workqueue("zswap-shrink",
18748409a385SRonald Monthero 			WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
187545190f01SVitaly Wool 	if (!shrink_wq)
187645190f01SVitaly Wool 		goto fallback_fail;
187745190f01SVitaly Wool 
18782b281117SSeth Jennings 	if (zswap_debugfs_init())
18792b281117SSeth Jennings 		pr_warn("debugfs initialization failed\n");
18809021ccecSLiu Shixin 	zswap_init_state = ZSWAP_INIT_SUCCEED;
18812b281117SSeth Jennings 	return 0;
1882f1c54846SDan Streetman 
188345190f01SVitaly Wool fallback_fail:
188438aeb071SDan Carpenter 	if (pool)
188545190f01SVitaly Wool 		zswap_pool_destroy(pool);
1886cab7a7e5SSebastian Andrzej Siewior hp_fail:
1887b7919122SLiu Shixin 	kmem_cache_destroy(zswap_entry_cache);
1888f1c54846SDan Streetman cache_fail:
1889d7b028f5SDan Streetman 	/* if built-in, we aren't unloaded on failure; don't allow use */
18909021ccecSLiu Shixin 	zswap_init_state = ZSWAP_INIT_FAILED;
1891d7b028f5SDan Streetman 	zswap_enabled = false;
18922b281117SSeth Jennings 	return -ENOMEM;
18932b281117SSeth Jennings }
1894141fdeecSLiu Shixin 
1895141fdeecSLiu Shixin static int __init zswap_init(void)
1896141fdeecSLiu Shixin {
1897141fdeecSLiu Shixin 	if (!zswap_enabled)
1898141fdeecSLiu Shixin 		return 0;
1899141fdeecSLiu Shixin 	return zswap_setup();
1900141fdeecSLiu Shixin }
19012b281117SSeth Jennings /* must be late so crypto has time to come up */
1902141fdeecSLiu Shixin late_initcall(zswap_init);
19032b281117SSeth Jennings 
190468386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
19052b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages");
1906