1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22b281117SSeth Jennings /* 32b281117SSeth Jennings * zswap.c - zswap driver file 42b281117SSeth Jennings * 542c06a0eSJohannes Weiner * zswap is a cache that takes pages that are in the process 62b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 72b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 82b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 92b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 102b281117SSeth Jennings * 112b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 122b281117SSeth Jennings */ 132b281117SSeth Jennings 142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 152b281117SSeth Jennings 162b281117SSeth Jennings #include <linux/module.h> 172b281117SSeth Jennings #include <linux/cpu.h> 182b281117SSeth Jennings #include <linux/highmem.h> 192b281117SSeth Jennings #include <linux/slab.h> 202b281117SSeth Jennings #include <linux/spinlock.h> 212b281117SSeth Jennings #include <linux/types.h> 222b281117SSeth Jennings #include <linux/atomic.h> 232b281117SSeth Jennings #include <linux/rbtree.h> 242b281117SSeth Jennings #include <linux/swap.h> 252b281117SSeth Jennings #include <linux/crypto.h> 261ec3b5feSBarry Song #include <linux/scatterlist.h> 27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h> 282b281117SSeth Jennings #include <linux/mempool.h> 2912d79d64SDan Streetman #include <linux/zpool.h> 301ec3b5feSBarry Song #include <crypto/acompress.h> 3142c06a0eSJohannes Weiner #include <linux/zswap.h> 322b281117SSeth Jennings #include <linux/mm_types.h> 332b281117SSeth Jennings #include <linux/page-flags.h> 342b281117SSeth Jennings #include <linux/swapops.h> 352b281117SSeth Jennings #include <linux/writeback.h> 362b281117SSeth Jennings #include <linux/pagemap.h> 3745190f01SVitaly Wool #include <linux/workqueue.h> 38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h> 392b281117SSeth Jennings 40014bb1deSNeilBrown #include "swap.h" 41e0228d59SDomenico Cerasuolo #include "internal.h" 42014bb1deSNeilBrown 432b281117SSeth Jennings /********************************* 442b281117SSeth Jennings * statistics 452b281117SSeth Jennings **********************************/ 4612d79d64SDan Streetman /* Total bytes used by the compressed storage */ 47f6498b77SJohannes Weiner u64 zswap_pool_total_size; 482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0); 50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */ 51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */ 67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail; 682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 692b281117SSeth Jennings static u64 zswap_reject_compress_poor; 702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 712b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 742b281117SSeth Jennings 7545190f01SVitaly Wool /* Shrinker work queue */ 7645190f01SVitaly Wool static struct workqueue_struct *shrink_wq; 7745190f01SVitaly Wool /* Pool limit was hit, we need to calm down */ 7845190f01SVitaly Wool static bool zswap_pool_reached_full; 7945190f01SVitaly Wool 802b281117SSeth Jennings /********************************* 812b281117SSeth Jennings * tunables 822b281117SSeth Jennings **********************************/ 83c00ed16aSDan Streetman 84bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET "" 85bae21db8SDan Streetman 86141fdeecSLiu Shixin static int zswap_setup(void); 87141fdeecSLiu Shixin 88bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */ 89bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 90d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *, 91d7b028f5SDan Streetman const struct kernel_param *); 9283aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = { 93d7b028f5SDan Streetman .set = zswap_enabled_param_set, 94d7b028f5SDan Streetman .get = param_get_bool, 95d7b028f5SDan Streetman }; 96d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 972b281117SSeth Jennings 9890b0fc26SDan Streetman /* Crypto compressor to use */ 99bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 10090b0fc26SDan Streetman static int zswap_compressor_param_set(const char *, 10190b0fc26SDan Streetman const struct kernel_param *); 10283aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = { 10390b0fc26SDan Streetman .set = zswap_compressor_param_set, 104c99b42c3SDan Streetman .get = param_get_charp, 105c99b42c3SDan Streetman .free = param_free_charp, 10690b0fc26SDan Streetman }; 10790b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops, 108c99b42c3SDan Streetman &zswap_compressor, 0644); 10990b0fc26SDan Streetman 11090b0fc26SDan Streetman /* Compressed storage zpool to use */ 111bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 11290b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *); 11383aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = { 11490b0fc26SDan Streetman .set = zswap_zpool_param_set, 115c99b42c3SDan Streetman .get = param_get_charp, 116c99b42c3SDan Streetman .free = param_free_charp, 11790b0fc26SDan Streetman }; 118c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 1192b281117SSeth Jennings 1202b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 1212b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 12290b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 12360105e12SMinchan Kim 12445190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */ 12545190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 12645190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 12745190f01SVitaly Wool uint, 0644); 12845190f01SVitaly Wool 129cb325dddSMaciej S. Szmigiero /* 130cb325dddSMaciej S. Szmigiero * Enable/disable handling same-value filled pages (enabled by default). 131cb325dddSMaciej S. Szmigiero * If disabled every page is considered non-same-value filled. 132cb325dddSMaciej S. Szmigiero */ 133a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true; 134a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 135a85f878bSSrividya Desireddy bool, 0644); 136a85f878bSSrividya Desireddy 137cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */ 138cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true; 139cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 140cb325dddSMaciej S. Szmigiero bool, 0644); 141cb325dddSMaciej S. Szmigiero 142b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */ 143b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32 144b8cf32dcSYosry Ahmed 145b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */ 146b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED( 147b5ba474fSNhat Pham CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); 148b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); 149b5ba474fSNhat Pham 150501a06feSNhat Pham bool is_zswap_enabled(void) 151501a06feSNhat Pham { 152501a06feSNhat Pham return zswap_enabled; 153501a06feSNhat Pham } 154501a06feSNhat Pham 1552b281117SSeth Jennings /********************************* 1562b281117SSeth Jennings * data structures 1572b281117SSeth Jennings **********************************/ 158f1c54846SDan Streetman 1591ec3b5feSBarry Song struct crypto_acomp_ctx { 1601ec3b5feSBarry Song struct crypto_acomp *acomp; 1611ec3b5feSBarry Song struct acomp_req *req; 1621ec3b5feSBarry Song struct crypto_wait wait; 1638ba2f844SChengming Zhou u8 *buffer; 1648ba2f844SChengming Zhou struct mutex mutex; 1651ec3b5feSBarry Song }; 1661ec3b5feSBarry Song 167f999f38bSDomenico Cerasuolo /* 168f999f38bSDomenico Cerasuolo * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 169f999f38bSDomenico Cerasuolo * The only case where lru_lock is not acquired while holding tree.lock is 170f999f38bSDomenico Cerasuolo * when a zswap_entry is taken off the lru for writeback, in that case it 171f999f38bSDomenico Cerasuolo * needs to be verified that it's still valid in the tree. 172f999f38bSDomenico Cerasuolo */ 173f1c54846SDan Streetman struct zswap_pool { 174b8cf32dcSYosry Ahmed struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 1751ec3b5feSBarry Song struct crypto_acomp_ctx __percpu *acomp_ctx; 17694ace3feSChengming Zhou struct percpu_ref ref; 177f1c54846SDan Streetman struct list_head list; 17845190f01SVitaly Wool struct work_struct release_work; 179cab7a7e5SSebastian Andrzej Siewior struct hlist_node node; 180f1c54846SDan Streetman char tfm_name[CRYPTO_MAX_ALG_NAME]; 181f1c54846SDan Streetman }; 182f1c54846SDan Streetman 183*e35606e4SChengming Zhou /* Global LRU lists shared by all zswap pools. */ 184*e35606e4SChengming Zhou static struct list_lru zswap_list_lru; 185*e35606e4SChengming Zhou /* counter of pages stored in all zswap pools. */ 186*e35606e4SChengming Zhou static atomic_t zswap_nr_stored = ATOMIC_INIT(0); 187*e35606e4SChengming Zhou 188*e35606e4SChengming Zhou /* The lock protects zswap_next_shrink updates. */ 189*e35606e4SChengming Zhou static DEFINE_SPINLOCK(zswap_shrink_lock); 190*e35606e4SChengming Zhou static struct mem_cgroup *zswap_next_shrink; 191*e35606e4SChengming Zhou static struct work_struct zswap_shrink_work; 192*e35606e4SChengming Zhou static struct shrinker *zswap_shrinker; 193bf9b7df2SChengming Zhou 1942b281117SSeth Jennings /* 1952b281117SSeth Jennings * struct zswap_entry 1962b281117SSeth Jennings * 1972b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1982b281117SSeth Jennings * page within zswap. 1992b281117SSeth Jennings * 2002b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 20197157d89SXiu Jianfeng * swpentry - associated swap entry, the offset indexes into the red-black tree 2022b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 203f999f38bSDomenico Cerasuolo * decompression. For a same value filled page length is 0, and both 204f999f38bSDomenico Cerasuolo * pool and lru are invalid and must be ignored. 205f1c54846SDan Streetman * pool - the zswap_pool the entry's data is in 206f1c54846SDan Streetman * handle - zpool allocation handle that stores the compressed page data 207a85f878bSSrividya Desireddy * value - value of the same-value filled pages which have same content 20897157d89SXiu Jianfeng * objcg - the obj_cgroup that the compressed memory is charged to 209f999f38bSDomenico Cerasuolo * lru - handle to the pool's lru used to evict pages. 2102b281117SSeth Jennings */ 2112b281117SSeth Jennings struct zswap_entry { 2122b281117SSeth Jennings struct rb_node rbnode; 2130bb48849SDomenico Cerasuolo swp_entry_t swpentry; 2142b281117SSeth Jennings unsigned int length; 215f1c54846SDan Streetman struct zswap_pool *pool; 216a85f878bSSrividya Desireddy union { 2172b281117SSeth Jennings unsigned long handle; 218a85f878bSSrividya Desireddy unsigned long value; 219a85f878bSSrividya Desireddy }; 220f4840ccfSJohannes Weiner struct obj_cgroup *objcg; 221f999f38bSDomenico Cerasuolo struct list_head lru; 2222b281117SSeth Jennings }; 2232b281117SSeth Jennings 2242b281117SSeth Jennings struct zswap_tree { 2252b281117SSeth Jennings struct rb_root rbroot; 2262b281117SSeth Jennings spinlock_t lock; 2272b281117SSeth Jennings }; 2282b281117SSeth Jennings 2292b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 23044c7c734SChengming Zhou static unsigned int nr_zswap_trees[MAX_SWAPFILES]; 2312b281117SSeth Jennings 232f1c54846SDan Streetman /* RCU-protected iteration */ 233f1c54846SDan Streetman static LIST_HEAD(zswap_pools); 234f1c54846SDan Streetman /* protects zswap_pools list modification */ 235f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock); 23632a4e169SDan Streetman /* pool counter to provide unique names to zpool */ 23732a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0); 238f1c54846SDan Streetman 2399021ccecSLiu Shixin enum zswap_init_type { 2409021ccecSLiu Shixin ZSWAP_UNINIT, 2419021ccecSLiu Shixin ZSWAP_INIT_SUCCEED, 2429021ccecSLiu Shixin ZSWAP_INIT_FAILED 2439021ccecSLiu Shixin }; 24490b0fc26SDan Streetman 2459021ccecSLiu Shixin static enum zswap_init_type zswap_init_state; 246d7b028f5SDan Streetman 247141fdeecSLiu Shixin /* used to ensure the integrity of initialization */ 248141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock); 249f1c54846SDan Streetman 250ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */ 251ae3d89a7SDan Streetman static bool zswap_has_pool; 252ae3d89a7SDan Streetman 253f1c54846SDan Streetman /********************************* 254f1c54846SDan Streetman * helpers and fwd declarations 255f1c54846SDan Streetman **********************************/ 256f1c54846SDan Streetman 25744c7c734SChengming Zhou static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp) 25844c7c734SChengming Zhou { 25944c7c734SChengming Zhou return &zswap_trees[swp_type(swp)][swp_offset(swp) 26044c7c734SChengming Zhou >> SWAP_ADDRESS_SPACE_SHIFT]; 26144c7c734SChengming Zhou } 26244c7c734SChengming Zhou 263f1c54846SDan Streetman #define zswap_pool_debug(msg, p) \ 264f1c54846SDan Streetman pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 265b8cf32dcSYosry Ahmed zpool_get_type((p)->zpools[0])) 266f1c54846SDan Streetman 267f1c54846SDan Streetman static bool zswap_is_full(void) 268f1c54846SDan Streetman { 269ca79b0c2SArun KS return totalram_pages() * zswap_max_pool_percent / 100 < 270f1c54846SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 271f1c54846SDan Streetman } 272f1c54846SDan Streetman 27345190f01SVitaly Wool static bool zswap_can_accept(void) 27445190f01SVitaly Wool { 27545190f01SVitaly Wool return totalram_pages() * zswap_accept_thr_percent / 100 * 27645190f01SVitaly Wool zswap_max_pool_percent / 100 > 27745190f01SVitaly Wool DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 27845190f01SVitaly Wool } 27945190f01SVitaly Wool 280b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool) 281b5ba474fSNhat Pham { 282b5ba474fSNhat Pham u64 pool_size = 0; 283b5ba474fSNhat Pham int i; 284b5ba474fSNhat Pham 285b5ba474fSNhat Pham for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 286b5ba474fSNhat Pham pool_size += zpool_get_total_size(pool->zpools[i]); 287b5ba474fSNhat Pham 288b5ba474fSNhat Pham return pool_size; 289b5ba474fSNhat Pham } 290b5ba474fSNhat Pham 291f1c54846SDan Streetman static void zswap_update_total_size(void) 292f1c54846SDan Streetman { 293f1c54846SDan Streetman struct zswap_pool *pool; 294f1c54846SDan Streetman u64 total = 0; 295f1c54846SDan Streetman 296f1c54846SDan Streetman rcu_read_lock(); 297f1c54846SDan Streetman 298f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 299b5ba474fSNhat Pham total += get_zswap_pool_size(pool); 300f1c54846SDan Streetman 301f1c54846SDan Streetman rcu_read_unlock(); 302f1c54846SDan Streetman 303f1c54846SDan Streetman zswap_pool_total_size = total; 304f1c54846SDan Streetman } 305f1c54846SDan Streetman 306a984649bSJohannes Weiner /********************************* 307a984649bSJohannes Weiner * pool functions 308a984649bSJohannes Weiner **********************************/ 30994ace3feSChengming Zhou static void __zswap_pool_empty(struct percpu_ref *ref); 310a984649bSJohannes Weiner 311a984649bSJohannes Weiner static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 312a984649bSJohannes Weiner { 313a984649bSJohannes Weiner int i; 314a984649bSJohannes Weiner struct zswap_pool *pool; 315a984649bSJohannes Weiner char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 316a984649bSJohannes Weiner gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 317a984649bSJohannes Weiner int ret; 318a984649bSJohannes Weiner 319a984649bSJohannes Weiner if (!zswap_has_pool) { 320a984649bSJohannes Weiner /* if either are unset, pool initialization failed, and we 321a984649bSJohannes Weiner * need both params to be set correctly before trying to 322a984649bSJohannes Weiner * create a pool. 323a984649bSJohannes Weiner */ 324a984649bSJohannes Weiner if (!strcmp(type, ZSWAP_PARAM_UNSET)) 325a984649bSJohannes Weiner return NULL; 326a984649bSJohannes Weiner if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 327a984649bSJohannes Weiner return NULL; 328a984649bSJohannes Weiner } 329a984649bSJohannes Weiner 330a984649bSJohannes Weiner pool = kzalloc(sizeof(*pool), GFP_KERNEL); 331a984649bSJohannes Weiner if (!pool) 332a984649bSJohannes Weiner return NULL; 333a984649bSJohannes Weiner 334a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 335a984649bSJohannes Weiner /* unique name for each pool specifically required by zsmalloc */ 336a984649bSJohannes Weiner snprintf(name, 38, "zswap%x", 337a984649bSJohannes Weiner atomic_inc_return(&zswap_pools_count)); 338a984649bSJohannes Weiner 339a984649bSJohannes Weiner pool->zpools[i] = zpool_create_pool(type, name, gfp); 340a984649bSJohannes Weiner if (!pool->zpools[i]) { 341a984649bSJohannes Weiner pr_err("%s zpool not available\n", type); 342a984649bSJohannes Weiner goto error; 343a984649bSJohannes Weiner } 344a984649bSJohannes Weiner } 345a984649bSJohannes Weiner pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 346a984649bSJohannes Weiner 347a984649bSJohannes Weiner strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 348a984649bSJohannes Weiner 349a984649bSJohannes Weiner pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 350a984649bSJohannes Weiner if (!pool->acomp_ctx) { 351a984649bSJohannes Weiner pr_err("percpu alloc failed\n"); 352a984649bSJohannes Weiner goto error; 353a984649bSJohannes Weiner } 354a984649bSJohannes Weiner 355a984649bSJohannes Weiner ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 356a984649bSJohannes Weiner &pool->node); 357a984649bSJohannes Weiner if (ret) 358a984649bSJohannes Weiner goto error; 359a984649bSJohannes Weiner 360a984649bSJohannes Weiner /* being the current pool takes 1 ref; this func expects the 361a984649bSJohannes Weiner * caller to always add the new pool as the current pool 362a984649bSJohannes Weiner */ 36394ace3feSChengming Zhou ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, 36494ace3feSChengming Zhou PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); 36594ace3feSChengming Zhou if (ret) 36694ace3feSChengming Zhou goto ref_fail; 367a984649bSJohannes Weiner INIT_LIST_HEAD(&pool->list); 368a984649bSJohannes Weiner 369a984649bSJohannes Weiner zswap_pool_debug("created", pool); 370a984649bSJohannes Weiner 371a984649bSJohannes Weiner return pool; 372a984649bSJohannes Weiner 37394ace3feSChengming Zhou ref_fail: 37494ace3feSChengming Zhou cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 375a984649bSJohannes Weiner error: 376a984649bSJohannes Weiner if (pool->acomp_ctx) 377a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 378a984649bSJohannes Weiner while (i--) 379a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 380a984649bSJohannes Weiner kfree(pool); 381a984649bSJohannes Weiner return NULL; 382a984649bSJohannes Weiner } 383a984649bSJohannes Weiner 384a984649bSJohannes Weiner static struct zswap_pool *__zswap_pool_create_fallback(void) 385a984649bSJohannes Weiner { 386a984649bSJohannes Weiner bool has_comp, has_zpool; 387a984649bSJohannes Weiner 388a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 389a984649bSJohannes Weiner if (!has_comp && strcmp(zswap_compressor, 390a984649bSJohannes Weiner CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 391a984649bSJohannes Weiner pr_err("compressor %s not available, using default %s\n", 392a984649bSJohannes Weiner zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 393a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 394a984649bSJohannes Weiner zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 395a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 396a984649bSJohannes Weiner } 397a984649bSJohannes Weiner if (!has_comp) { 398a984649bSJohannes Weiner pr_err("default compressor %s not available\n", 399a984649bSJohannes Weiner zswap_compressor); 400a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 401a984649bSJohannes Weiner zswap_compressor = ZSWAP_PARAM_UNSET; 402a984649bSJohannes Weiner } 403a984649bSJohannes Weiner 404a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 405a984649bSJohannes Weiner if (!has_zpool && strcmp(zswap_zpool_type, 406a984649bSJohannes Weiner CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 407a984649bSJohannes Weiner pr_err("zpool %s not available, using default %s\n", 408a984649bSJohannes Weiner zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 409a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 410a984649bSJohannes Weiner zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 411a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 412a984649bSJohannes Weiner } 413a984649bSJohannes Weiner if (!has_zpool) { 414a984649bSJohannes Weiner pr_err("default zpool %s not available\n", 415a984649bSJohannes Weiner zswap_zpool_type); 416a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 417a984649bSJohannes Weiner zswap_zpool_type = ZSWAP_PARAM_UNSET; 418a984649bSJohannes Weiner } 419a984649bSJohannes Weiner 420a984649bSJohannes Weiner if (!has_comp || !has_zpool) 421a984649bSJohannes Weiner return NULL; 422a984649bSJohannes Weiner 423a984649bSJohannes Weiner return zswap_pool_create(zswap_zpool_type, zswap_compressor); 424a984649bSJohannes Weiner } 425a984649bSJohannes Weiner 426a984649bSJohannes Weiner static void zswap_pool_destroy(struct zswap_pool *pool) 427a984649bSJohannes Weiner { 428a984649bSJohannes Weiner int i; 429a984649bSJohannes Weiner 430a984649bSJohannes Weiner zswap_pool_debug("destroying", pool); 431a984649bSJohannes Weiner 432a984649bSJohannes Weiner cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 433a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 434a984649bSJohannes Weiner 435a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 436a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 437a984649bSJohannes Weiner kfree(pool); 438a984649bSJohannes Weiner } 439a984649bSJohannes Weiner 44039f3ec8eSJohannes Weiner static void __zswap_pool_release(struct work_struct *work) 44139f3ec8eSJohannes Weiner { 44239f3ec8eSJohannes Weiner struct zswap_pool *pool = container_of(work, typeof(*pool), 44339f3ec8eSJohannes Weiner release_work); 44439f3ec8eSJohannes Weiner 44539f3ec8eSJohannes Weiner synchronize_rcu(); 44639f3ec8eSJohannes Weiner 44794ace3feSChengming Zhou /* nobody should have been able to get a ref... */ 44894ace3feSChengming Zhou WARN_ON(!percpu_ref_is_zero(&pool->ref)); 44994ace3feSChengming Zhou percpu_ref_exit(&pool->ref); 45039f3ec8eSJohannes Weiner 45139f3ec8eSJohannes Weiner /* pool is now off zswap_pools list and has no references. */ 45239f3ec8eSJohannes Weiner zswap_pool_destroy(pool); 45339f3ec8eSJohannes Weiner } 45439f3ec8eSJohannes Weiner 45539f3ec8eSJohannes Weiner static struct zswap_pool *zswap_pool_current(void); 45639f3ec8eSJohannes Weiner 45794ace3feSChengming Zhou static void __zswap_pool_empty(struct percpu_ref *ref) 45839f3ec8eSJohannes Weiner { 45939f3ec8eSJohannes Weiner struct zswap_pool *pool; 46039f3ec8eSJohannes Weiner 46194ace3feSChengming Zhou pool = container_of(ref, typeof(*pool), ref); 46239f3ec8eSJohannes Weiner 46394ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 46439f3ec8eSJohannes Weiner 46539f3ec8eSJohannes Weiner WARN_ON(pool == zswap_pool_current()); 46639f3ec8eSJohannes Weiner 46739f3ec8eSJohannes Weiner list_del_rcu(&pool->list); 46839f3ec8eSJohannes Weiner 46939f3ec8eSJohannes Weiner INIT_WORK(&pool->release_work, __zswap_pool_release); 47039f3ec8eSJohannes Weiner schedule_work(&pool->release_work); 47139f3ec8eSJohannes Weiner 47294ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 47339f3ec8eSJohannes Weiner } 47439f3ec8eSJohannes Weiner 47539f3ec8eSJohannes Weiner static int __must_check zswap_pool_get(struct zswap_pool *pool) 47639f3ec8eSJohannes Weiner { 47739f3ec8eSJohannes Weiner if (!pool) 47839f3ec8eSJohannes Weiner return 0; 47939f3ec8eSJohannes Weiner 48094ace3feSChengming Zhou return percpu_ref_tryget(&pool->ref); 48139f3ec8eSJohannes Weiner } 48239f3ec8eSJohannes Weiner 48339f3ec8eSJohannes Weiner static void zswap_pool_put(struct zswap_pool *pool) 48439f3ec8eSJohannes Weiner { 48594ace3feSChengming Zhou percpu_ref_put(&pool->ref); 48639f3ec8eSJohannes Weiner } 48739f3ec8eSJohannes Weiner 488c1a0ecb8SJohannes Weiner static struct zswap_pool *__zswap_pool_current(void) 489c1a0ecb8SJohannes Weiner { 490c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 491c1a0ecb8SJohannes Weiner 492c1a0ecb8SJohannes Weiner pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 493c1a0ecb8SJohannes Weiner WARN_ONCE(!pool && zswap_has_pool, 494c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 495c1a0ecb8SJohannes Weiner 496c1a0ecb8SJohannes Weiner return pool; 497c1a0ecb8SJohannes Weiner } 498c1a0ecb8SJohannes Weiner 499c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current(void) 500c1a0ecb8SJohannes Weiner { 501c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 502c1a0ecb8SJohannes Weiner 503c1a0ecb8SJohannes Weiner return __zswap_pool_current(); 504c1a0ecb8SJohannes Weiner } 505c1a0ecb8SJohannes Weiner 506c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current_get(void) 507c1a0ecb8SJohannes Weiner { 508c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 509c1a0ecb8SJohannes Weiner 510c1a0ecb8SJohannes Weiner rcu_read_lock(); 511c1a0ecb8SJohannes Weiner 512c1a0ecb8SJohannes Weiner pool = __zswap_pool_current(); 513c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 514c1a0ecb8SJohannes Weiner pool = NULL; 515c1a0ecb8SJohannes Weiner 516c1a0ecb8SJohannes Weiner rcu_read_unlock(); 517c1a0ecb8SJohannes Weiner 518c1a0ecb8SJohannes Weiner return pool; 519c1a0ecb8SJohannes Weiner } 520c1a0ecb8SJohannes Weiner 521c1a0ecb8SJohannes Weiner /* type and compressor must be null-terminated */ 522c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 523c1a0ecb8SJohannes Weiner { 524c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 525c1a0ecb8SJohannes Weiner 526c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 527c1a0ecb8SJohannes Weiner 528c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) { 529c1a0ecb8SJohannes Weiner if (strcmp(pool->tfm_name, compressor)) 530c1a0ecb8SJohannes Weiner continue; 531c1a0ecb8SJohannes Weiner /* all zpools share the same type */ 532c1a0ecb8SJohannes Weiner if (strcmp(zpool_get_type(pool->zpools[0]), type)) 533c1a0ecb8SJohannes Weiner continue; 534c1a0ecb8SJohannes Weiner /* if we can't get it, it's about to be destroyed */ 535c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 536c1a0ecb8SJohannes Weiner continue; 537c1a0ecb8SJohannes Weiner return pool; 538c1a0ecb8SJohannes Weiner } 539c1a0ecb8SJohannes Weiner 540c1a0ecb8SJohannes Weiner return NULL; 541c1a0ecb8SJohannes Weiner } 542c1a0ecb8SJohannes Weiner 543abca07c0SJohannes Weiner /********************************* 544abca07c0SJohannes Weiner * param callbacks 545abca07c0SJohannes Weiner **********************************/ 546abca07c0SJohannes Weiner 547abca07c0SJohannes Weiner static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 548abca07c0SJohannes Weiner { 549abca07c0SJohannes Weiner /* no change required */ 550abca07c0SJohannes Weiner if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 551abca07c0SJohannes Weiner return false; 552abca07c0SJohannes Weiner return true; 553abca07c0SJohannes Weiner } 554abca07c0SJohannes Weiner 555abca07c0SJohannes Weiner /* val must be a null-terminated string */ 556abca07c0SJohannes Weiner static int __zswap_param_set(const char *val, const struct kernel_param *kp, 557abca07c0SJohannes Weiner char *type, char *compressor) 558abca07c0SJohannes Weiner { 559abca07c0SJohannes Weiner struct zswap_pool *pool, *put_pool = NULL; 560abca07c0SJohannes Weiner char *s = strstrip((char *)val); 561abca07c0SJohannes Weiner int ret = 0; 562abca07c0SJohannes Weiner bool new_pool = false; 563abca07c0SJohannes Weiner 564abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 565abca07c0SJohannes Weiner switch (zswap_init_state) { 566abca07c0SJohannes Weiner case ZSWAP_UNINIT: 567abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, 568abca07c0SJohannes Weiner * don't create a pool; that's done during init. 569abca07c0SJohannes Weiner */ 570abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 571abca07c0SJohannes Weiner break; 572abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 573abca07c0SJohannes Weiner new_pool = zswap_pool_changed(s, kp); 574abca07c0SJohannes Weiner break; 575abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 576abca07c0SJohannes Weiner pr_err("can't set param, initialization failed\n"); 577abca07c0SJohannes Weiner ret = -ENODEV; 578abca07c0SJohannes Weiner } 579abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 580abca07c0SJohannes Weiner 581abca07c0SJohannes Weiner /* no need to create a new pool, return directly */ 582abca07c0SJohannes Weiner if (!new_pool) 583abca07c0SJohannes Weiner return ret; 584abca07c0SJohannes Weiner 585abca07c0SJohannes Weiner if (!type) { 586abca07c0SJohannes Weiner if (!zpool_has_pool(s)) { 587abca07c0SJohannes Weiner pr_err("zpool %s not available\n", s); 588abca07c0SJohannes Weiner return -ENOENT; 589abca07c0SJohannes Weiner } 590abca07c0SJohannes Weiner type = s; 591abca07c0SJohannes Weiner } else if (!compressor) { 592abca07c0SJohannes Weiner if (!crypto_has_acomp(s, 0, 0)) { 593abca07c0SJohannes Weiner pr_err("compressor %s not available\n", s); 594abca07c0SJohannes Weiner return -ENOENT; 595abca07c0SJohannes Weiner } 596abca07c0SJohannes Weiner compressor = s; 597abca07c0SJohannes Weiner } else { 598abca07c0SJohannes Weiner WARN_ON(1); 599abca07c0SJohannes Weiner return -EINVAL; 600abca07c0SJohannes Weiner } 601abca07c0SJohannes Weiner 60294ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 603abca07c0SJohannes Weiner 604abca07c0SJohannes Weiner pool = zswap_pool_find_get(type, compressor); 605abca07c0SJohannes Weiner if (pool) { 606abca07c0SJohannes Weiner zswap_pool_debug("using existing", pool); 607abca07c0SJohannes Weiner WARN_ON(pool == zswap_pool_current()); 608abca07c0SJohannes Weiner list_del_rcu(&pool->list); 609abca07c0SJohannes Weiner } 610abca07c0SJohannes Weiner 61194ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 612abca07c0SJohannes Weiner 613abca07c0SJohannes Weiner if (!pool) 614abca07c0SJohannes Weiner pool = zswap_pool_create(type, compressor); 61594ace3feSChengming Zhou else { 61694ace3feSChengming Zhou /* 61794ace3feSChengming Zhou * Restore the initial ref dropped by percpu_ref_kill() 61894ace3feSChengming Zhou * when the pool was decommissioned and switch it again 61994ace3feSChengming Zhou * to percpu mode. 62094ace3feSChengming Zhou */ 62194ace3feSChengming Zhou percpu_ref_resurrect(&pool->ref); 62294ace3feSChengming Zhou 62394ace3feSChengming Zhou /* Drop the ref from zswap_pool_find_get(). */ 62494ace3feSChengming Zhou zswap_pool_put(pool); 62594ace3feSChengming Zhou } 626abca07c0SJohannes Weiner 627abca07c0SJohannes Weiner if (pool) 628abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 629abca07c0SJohannes Weiner else 630abca07c0SJohannes Weiner ret = -EINVAL; 631abca07c0SJohannes Weiner 63294ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 633abca07c0SJohannes Weiner 634abca07c0SJohannes Weiner if (!ret) { 635abca07c0SJohannes Weiner put_pool = zswap_pool_current(); 636abca07c0SJohannes Weiner list_add_rcu(&pool->list, &zswap_pools); 637abca07c0SJohannes Weiner zswap_has_pool = true; 638abca07c0SJohannes Weiner } else if (pool) { 639abca07c0SJohannes Weiner /* add the possibly pre-existing pool to the end of the pools 640abca07c0SJohannes Weiner * list; if it's new (and empty) then it'll be removed and 641abca07c0SJohannes Weiner * destroyed by the put after we drop the lock 642abca07c0SJohannes Weiner */ 643abca07c0SJohannes Weiner list_add_tail_rcu(&pool->list, &zswap_pools); 644abca07c0SJohannes Weiner put_pool = pool; 645abca07c0SJohannes Weiner } 646abca07c0SJohannes Weiner 64794ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 648abca07c0SJohannes Weiner 649abca07c0SJohannes Weiner if (!zswap_has_pool && !pool) { 650abca07c0SJohannes Weiner /* if initial pool creation failed, and this pool creation also 651abca07c0SJohannes Weiner * failed, maybe both compressor and zpool params were bad. 652abca07c0SJohannes Weiner * Allow changing this param, so pool creation will succeed 653abca07c0SJohannes Weiner * when the other param is changed. We already verified this 654abca07c0SJohannes Weiner * param is ok in the zpool_has_pool() or crypto_has_acomp() 655abca07c0SJohannes Weiner * checks above. 656abca07c0SJohannes Weiner */ 657abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 658abca07c0SJohannes Weiner } 659abca07c0SJohannes Weiner 660abca07c0SJohannes Weiner /* drop the ref from either the old current pool, 661abca07c0SJohannes Weiner * or the new pool we failed to add 662abca07c0SJohannes Weiner */ 663abca07c0SJohannes Weiner if (put_pool) 66494ace3feSChengming Zhou percpu_ref_kill(&put_pool->ref); 665abca07c0SJohannes Weiner 666abca07c0SJohannes Weiner return ret; 667abca07c0SJohannes Weiner } 668abca07c0SJohannes Weiner 669abca07c0SJohannes Weiner static int zswap_compressor_param_set(const char *val, 670abca07c0SJohannes Weiner const struct kernel_param *kp) 671abca07c0SJohannes Weiner { 672abca07c0SJohannes Weiner return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 673abca07c0SJohannes Weiner } 674abca07c0SJohannes Weiner 675abca07c0SJohannes Weiner static int zswap_zpool_param_set(const char *val, 676abca07c0SJohannes Weiner const struct kernel_param *kp) 677abca07c0SJohannes Weiner { 678abca07c0SJohannes Weiner return __zswap_param_set(val, kp, NULL, zswap_compressor); 679abca07c0SJohannes Weiner } 680abca07c0SJohannes Weiner 681abca07c0SJohannes Weiner static int zswap_enabled_param_set(const char *val, 682abca07c0SJohannes Weiner const struct kernel_param *kp) 683abca07c0SJohannes Weiner { 684abca07c0SJohannes Weiner int ret = -ENODEV; 685abca07c0SJohannes Weiner 686abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, only set param. */ 687abca07c0SJohannes Weiner if (system_state != SYSTEM_RUNNING) 688abca07c0SJohannes Weiner return param_set_bool(val, kp); 689abca07c0SJohannes Weiner 690abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 691abca07c0SJohannes Weiner switch (zswap_init_state) { 692abca07c0SJohannes Weiner case ZSWAP_UNINIT: 693abca07c0SJohannes Weiner if (zswap_setup()) 694abca07c0SJohannes Weiner break; 695abca07c0SJohannes Weiner fallthrough; 696abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 697abca07c0SJohannes Weiner if (!zswap_has_pool) 698abca07c0SJohannes Weiner pr_err("can't enable, no pool configured\n"); 699abca07c0SJohannes Weiner else 700abca07c0SJohannes Weiner ret = param_set_bool(val, kp); 701abca07c0SJohannes Weiner break; 702abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 703abca07c0SJohannes Weiner pr_err("can't enable, initialization failed\n"); 704abca07c0SJohannes Weiner } 705abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 706abca07c0SJohannes Weiner 707abca07c0SJohannes Weiner return ret; 708abca07c0SJohannes Weiner } 709abca07c0SJohannes Weiner 710506a86c5SJohannes Weiner /********************************* 711506a86c5SJohannes Weiner * lru functions 712506a86c5SJohannes Weiner **********************************/ 713506a86c5SJohannes Weiner 714a65b0e76SDomenico Cerasuolo /* should be called under RCU */ 715a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG 716a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 717a65b0e76SDomenico Cerasuolo { 718a65b0e76SDomenico Cerasuolo return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; 719a65b0e76SDomenico Cerasuolo } 720a65b0e76SDomenico Cerasuolo #else 721a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 722a65b0e76SDomenico Cerasuolo { 723a65b0e76SDomenico Cerasuolo return NULL; 724a65b0e76SDomenico Cerasuolo } 725a65b0e76SDomenico Cerasuolo #endif 726a65b0e76SDomenico Cerasuolo 727a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry) 728a65b0e76SDomenico Cerasuolo { 729a65b0e76SDomenico Cerasuolo return page_to_nid(virt_to_page(entry)); 730a65b0e76SDomenico Cerasuolo } 731a65b0e76SDomenico Cerasuolo 732a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) 733a65b0e76SDomenico Cerasuolo { 734b5ba474fSNhat Pham atomic_long_t *nr_zswap_protected; 735b5ba474fSNhat Pham unsigned long lru_size, old, new; 736a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 737a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 738b5ba474fSNhat Pham struct lruvec *lruvec; 739a65b0e76SDomenico Cerasuolo 740a65b0e76SDomenico Cerasuolo /* 741a65b0e76SDomenico Cerasuolo * Note that it is safe to use rcu_read_lock() here, even in the face of 742a65b0e76SDomenico Cerasuolo * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection 743a65b0e76SDomenico Cerasuolo * used in list_lru lookup, only two scenarios are possible: 744a65b0e76SDomenico Cerasuolo * 745a65b0e76SDomenico Cerasuolo * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The 746a65b0e76SDomenico Cerasuolo * new entry will be reparented to memcg's parent's list_lru. 747a65b0e76SDomenico Cerasuolo * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The 748a65b0e76SDomenico Cerasuolo * new entry will be added directly to memcg's parent's list_lru. 749a65b0e76SDomenico Cerasuolo * 7503f798aa6SChengming Zhou * Similar reasoning holds for list_lru_del(). 751a65b0e76SDomenico Cerasuolo */ 752a65b0e76SDomenico Cerasuolo rcu_read_lock(); 753a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 754a65b0e76SDomenico Cerasuolo /* will always succeed */ 755a65b0e76SDomenico Cerasuolo list_lru_add(list_lru, &entry->lru, nid, memcg); 756b5ba474fSNhat Pham 757b5ba474fSNhat Pham /* Update the protection area */ 758b5ba474fSNhat Pham lru_size = list_lru_count_one(list_lru, nid, memcg); 759b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); 760b5ba474fSNhat Pham nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected; 761b5ba474fSNhat Pham old = atomic_long_inc_return(nr_zswap_protected); 762b5ba474fSNhat Pham /* 763b5ba474fSNhat Pham * Decay to avoid overflow and adapt to changing workloads. 764b5ba474fSNhat Pham * This is based on LRU reclaim cost decaying heuristics. 765b5ba474fSNhat Pham */ 766b5ba474fSNhat Pham do { 767b5ba474fSNhat Pham new = old > lru_size / 4 ? old / 2 : old; 768b5ba474fSNhat Pham } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new)); 769a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 770a65b0e76SDomenico Cerasuolo } 771a65b0e76SDomenico Cerasuolo 772a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) 773a65b0e76SDomenico Cerasuolo { 774a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 775a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 776a65b0e76SDomenico Cerasuolo 777a65b0e76SDomenico Cerasuolo rcu_read_lock(); 778a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 779a65b0e76SDomenico Cerasuolo /* will always succeed */ 780a65b0e76SDomenico Cerasuolo list_lru_del(list_lru, &entry->lru, nid, memcg); 781a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 782a65b0e76SDomenico Cerasuolo } 783a65b0e76SDomenico Cerasuolo 7845182661aSJohannes Weiner void zswap_lruvec_state_init(struct lruvec *lruvec) 7855182661aSJohannes Weiner { 7865182661aSJohannes Weiner atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0); 7875182661aSJohannes Weiner } 7885182661aSJohannes Weiner 7895182661aSJohannes Weiner void zswap_folio_swapin(struct folio *folio) 7905182661aSJohannes Weiner { 7915182661aSJohannes Weiner struct lruvec *lruvec; 7925182661aSJohannes Weiner 7935182661aSJohannes Weiner if (folio) { 7945182661aSJohannes Weiner lruvec = folio_lruvec(folio); 7955182661aSJohannes Weiner atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 7965182661aSJohannes Weiner } 7975182661aSJohannes Weiner } 7985182661aSJohannes Weiner 7995182661aSJohannes Weiner void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) 8005182661aSJohannes Weiner { 801bf9b7df2SChengming Zhou /* lock out zswap shrinker walking memcg tree */ 802*e35606e4SChengming Zhou spin_lock(&zswap_shrink_lock); 803*e35606e4SChengming Zhou if (zswap_next_shrink == memcg) 804*e35606e4SChengming Zhou zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); 805*e35606e4SChengming Zhou spin_unlock(&zswap_shrink_lock); 8065182661aSJohannes Weiner } 8075182661aSJohannes Weiner 8085182661aSJohannes Weiner /********************************* 8092b281117SSeth Jennings * rbtree functions 8102b281117SSeth Jennings **********************************/ 8112b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 8122b281117SSeth Jennings { 8132b281117SSeth Jennings struct rb_node *node = root->rb_node; 8142b281117SSeth Jennings struct zswap_entry *entry; 8150bb48849SDomenico Cerasuolo pgoff_t entry_offset; 8162b281117SSeth Jennings 8172b281117SSeth Jennings while (node) { 8182b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 8190bb48849SDomenico Cerasuolo entry_offset = swp_offset(entry->swpentry); 8200bb48849SDomenico Cerasuolo if (entry_offset > offset) 8212b281117SSeth Jennings node = node->rb_left; 8220bb48849SDomenico Cerasuolo else if (entry_offset < offset) 8232b281117SSeth Jennings node = node->rb_right; 8242b281117SSeth Jennings else 8252b281117SSeth Jennings return entry; 8262b281117SSeth Jennings } 8272b281117SSeth Jennings return NULL; 8282b281117SSeth Jennings } 8292b281117SSeth Jennings 8302b281117SSeth Jennings /* 8312b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 8322b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 8332b281117SSeth Jennings */ 8342b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 8352b281117SSeth Jennings struct zswap_entry **dupentry) 8362b281117SSeth Jennings { 8372b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 8382b281117SSeth Jennings struct zswap_entry *myentry; 8390bb48849SDomenico Cerasuolo pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 8402b281117SSeth Jennings 8412b281117SSeth Jennings while (*link) { 8422b281117SSeth Jennings parent = *link; 8432b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 8440bb48849SDomenico Cerasuolo myentry_offset = swp_offset(myentry->swpentry); 8450bb48849SDomenico Cerasuolo if (myentry_offset > entry_offset) 8462b281117SSeth Jennings link = &(*link)->rb_left; 8470bb48849SDomenico Cerasuolo else if (myentry_offset < entry_offset) 8482b281117SSeth Jennings link = &(*link)->rb_right; 8492b281117SSeth Jennings else { 8502b281117SSeth Jennings *dupentry = myentry; 8512b281117SSeth Jennings return -EEXIST; 8522b281117SSeth Jennings } 8532b281117SSeth Jennings } 8542b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 8552b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 8562b281117SSeth Jennings return 0; 8572b281117SSeth Jennings } 8582b281117SSeth Jennings 859a230c20eSChengming Zhou static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 8600ab0abcfSWeijie Yang { 8610ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 8620ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 8630ab0abcfSWeijie Yang } 8640ab0abcfSWeijie Yang 86536034bf6SJohannes Weiner /********************************* 86636034bf6SJohannes Weiner * zswap entry functions 86736034bf6SJohannes Weiner **********************************/ 86836034bf6SJohannes Weiner static struct kmem_cache *zswap_entry_cache; 86936034bf6SJohannes Weiner 87036034bf6SJohannes Weiner static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) 87136034bf6SJohannes Weiner { 87236034bf6SJohannes Weiner struct zswap_entry *entry; 87336034bf6SJohannes Weiner entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); 87436034bf6SJohannes Weiner if (!entry) 87536034bf6SJohannes Weiner return NULL; 87636034bf6SJohannes Weiner RB_CLEAR_NODE(&entry->rbnode); 87736034bf6SJohannes Weiner return entry; 87836034bf6SJohannes Weiner } 87936034bf6SJohannes Weiner 88036034bf6SJohannes Weiner static void zswap_entry_cache_free(struct zswap_entry *entry) 88136034bf6SJohannes Weiner { 88236034bf6SJohannes Weiner kmem_cache_free(zswap_entry_cache, entry); 88336034bf6SJohannes Weiner } 88436034bf6SJohannes Weiner 885b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 886b8cf32dcSYosry Ahmed { 887b8cf32dcSYosry Ahmed int i = 0; 888b8cf32dcSYosry Ahmed 889b8cf32dcSYosry Ahmed if (ZSWAP_NR_ZPOOLS > 1) 890b8cf32dcSYosry Ahmed i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 891b8cf32dcSYosry Ahmed 892b8cf32dcSYosry Ahmed return entry->pool->zpools[i]; 893b8cf32dcSYosry Ahmed } 894b8cf32dcSYosry Ahmed 8950ab0abcfSWeijie Yang /* 89612d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 8970ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 8980ab0abcfSWeijie Yang */ 89942398be2SJohannes Weiner static void zswap_entry_free(struct zswap_entry *entry) 9000ab0abcfSWeijie Yang { 901a85f878bSSrividya Desireddy if (!entry->length) 902a85f878bSSrividya Desireddy atomic_dec(&zswap_same_filled_pages); 903a85f878bSSrividya Desireddy else { 904*e35606e4SChengming Zhou zswap_lru_del(&zswap_list_lru, entry); 905b8cf32dcSYosry Ahmed zpool_free(zswap_find_zpool(entry), entry->handle); 906*e35606e4SChengming Zhou atomic_dec(&zswap_nr_stored); 907f1c54846SDan Streetman zswap_pool_put(entry->pool); 908a85f878bSSrividya Desireddy } 9092e601e1eSJohannes Weiner if (entry->objcg) { 9102e601e1eSJohannes Weiner obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 9112e601e1eSJohannes Weiner obj_cgroup_put(entry->objcg); 9122e601e1eSJohannes Weiner } 9130ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 9140ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 915f1c54846SDan Streetman zswap_update_total_size(); 9160ab0abcfSWeijie Yang } 9170ab0abcfSWeijie Yang 9187dd1f7f0SJohannes Weiner /* 919a230c20eSChengming Zhou * The caller hold the tree lock and search the entry from the tree, 920a230c20eSChengming Zhou * so it must be on the tree, remove it from the tree and free it. 9217dd1f7f0SJohannes Weiner */ 9227dd1f7f0SJohannes Weiner static void zswap_invalidate_entry(struct zswap_tree *tree, 9237dd1f7f0SJohannes Weiner struct zswap_entry *entry) 9247dd1f7f0SJohannes Weiner { 925a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 926a230c20eSChengming Zhou zswap_entry_free(entry); 9277dd1f7f0SJohannes Weiner } 9287dd1f7f0SJohannes Weiner 9292b281117SSeth Jennings /********************************* 930f91e81d3SJohannes Weiner * compressed storage functions 931f91e81d3SJohannes Weiner **********************************/ 93264f200b8SJohannes Weiner static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 93364f200b8SJohannes Weiner { 93464f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 93564f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 93664f200b8SJohannes Weiner struct crypto_acomp *acomp; 93764f200b8SJohannes Weiner struct acomp_req *req; 93864f200b8SJohannes Weiner int ret; 93964f200b8SJohannes Weiner 94064f200b8SJohannes Weiner mutex_init(&acomp_ctx->mutex); 94164f200b8SJohannes Weiner 94264f200b8SJohannes Weiner acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 94364f200b8SJohannes Weiner if (!acomp_ctx->buffer) 94464f200b8SJohannes Weiner return -ENOMEM; 94564f200b8SJohannes Weiner 94664f200b8SJohannes Weiner acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 94764f200b8SJohannes Weiner if (IS_ERR(acomp)) { 94864f200b8SJohannes Weiner pr_err("could not alloc crypto acomp %s : %ld\n", 94964f200b8SJohannes Weiner pool->tfm_name, PTR_ERR(acomp)); 95064f200b8SJohannes Weiner ret = PTR_ERR(acomp); 95164f200b8SJohannes Weiner goto acomp_fail; 95264f200b8SJohannes Weiner } 95364f200b8SJohannes Weiner acomp_ctx->acomp = acomp; 95464f200b8SJohannes Weiner 95564f200b8SJohannes Weiner req = acomp_request_alloc(acomp_ctx->acomp); 95664f200b8SJohannes Weiner if (!req) { 95764f200b8SJohannes Weiner pr_err("could not alloc crypto acomp_request %s\n", 95864f200b8SJohannes Weiner pool->tfm_name); 95964f200b8SJohannes Weiner ret = -ENOMEM; 96064f200b8SJohannes Weiner goto req_fail; 96164f200b8SJohannes Weiner } 96264f200b8SJohannes Weiner acomp_ctx->req = req; 96364f200b8SJohannes Weiner 96464f200b8SJohannes Weiner crypto_init_wait(&acomp_ctx->wait); 96564f200b8SJohannes Weiner /* 96664f200b8SJohannes Weiner * if the backend of acomp is async zip, crypto_req_done() will wakeup 96764f200b8SJohannes Weiner * crypto_wait_req(); if the backend of acomp is scomp, the callback 96864f200b8SJohannes Weiner * won't be called, crypto_wait_req() will return without blocking. 96964f200b8SJohannes Weiner */ 97064f200b8SJohannes Weiner acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 97164f200b8SJohannes Weiner crypto_req_done, &acomp_ctx->wait); 97264f200b8SJohannes Weiner 97364f200b8SJohannes Weiner return 0; 97464f200b8SJohannes Weiner 97564f200b8SJohannes Weiner req_fail: 97664f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 97764f200b8SJohannes Weiner acomp_fail: 97864f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 97964f200b8SJohannes Weiner return ret; 98064f200b8SJohannes Weiner } 98164f200b8SJohannes Weiner 98264f200b8SJohannes Weiner static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 98364f200b8SJohannes Weiner { 98464f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 98564f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 98664f200b8SJohannes Weiner 98764f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx)) { 98864f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->req)) 98964f200b8SJohannes Weiner acomp_request_free(acomp_ctx->req); 99064f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 99164f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 99264f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 99364f200b8SJohannes Weiner } 99464f200b8SJohannes Weiner 99564f200b8SJohannes Weiner return 0; 99664f200b8SJohannes Weiner } 99764f200b8SJohannes Weiner 998f91e81d3SJohannes Weiner static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) 999f91e81d3SJohannes Weiner { 1000f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1001f91e81d3SJohannes Weiner struct scatterlist input, output; 100255e78c93SBarry Song int comp_ret = 0, alloc_ret = 0; 1003f91e81d3SJohannes Weiner unsigned int dlen = PAGE_SIZE; 1004f91e81d3SJohannes Weiner unsigned long handle; 1005f91e81d3SJohannes Weiner struct zpool *zpool; 1006f91e81d3SJohannes Weiner char *buf; 1007f91e81d3SJohannes Weiner gfp_t gfp; 1008f91e81d3SJohannes Weiner u8 *dst; 1009f91e81d3SJohannes Weiner 1010f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1011f91e81d3SJohannes Weiner 1012f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1013f91e81d3SJohannes Weiner 1014f91e81d3SJohannes Weiner dst = acomp_ctx->buffer; 1015f91e81d3SJohannes Weiner sg_init_table(&input, 1); 1016f91e81d3SJohannes Weiner sg_set_page(&input, &folio->page, PAGE_SIZE, 0); 1017f91e81d3SJohannes Weiner 1018f91e81d3SJohannes Weiner /* 1019f91e81d3SJohannes Weiner * We need PAGE_SIZE * 2 here since there maybe over-compression case, 1020f91e81d3SJohannes Weiner * and hardware-accelerators may won't check the dst buffer size, so 1021f91e81d3SJohannes Weiner * giving the dst buffer with enough length to avoid buffer overflow. 1022f91e81d3SJohannes Weiner */ 1023f91e81d3SJohannes Weiner sg_init_one(&output, dst, PAGE_SIZE * 2); 1024f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1025f91e81d3SJohannes Weiner 1026f91e81d3SJohannes Weiner /* 1027f91e81d3SJohannes Weiner * it maybe looks a little bit silly that we send an asynchronous request, 1028f91e81d3SJohannes Weiner * then wait for its completion synchronously. This makes the process look 1029f91e81d3SJohannes Weiner * synchronous in fact. 1030f91e81d3SJohannes Weiner * Theoretically, acomp supports users send multiple acomp requests in one 1031f91e81d3SJohannes Weiner * acomp instance, then get those requests done simultaneously. but in this 1032f91e81d3SJohannes Weiner * case, zswap actually does store and load page by page, there is no 1033f91e81d3SJohannes Weiner * existing method to send the second page before the first page is done 1034f91e81d3SJohannes Weiner * in one thread doing zwap. 1035f91e81d3SJohannes Weiner * but in different threads running on different cpu, we have different 1036f91e81d3SJohannes Weiner * acomp instance, so multiple threads can do (de)compression in parallel. 1037f91e81d3SJohannes Weiner */ 103855e78c93SBarry Song comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1039f91e81d3SJohannes Weiner dlen = acomp_ctx->req->dlen; 104055e78c93SBarry Song if (comp_ret) 1041f91e81d3SJohannes Weiner goto unlock; 1042f91e81d3SJohannes Weiner 1043f91e81d3SJohannes Weiner zpool = zswap_find_zpool(entry); 1044f91e81d3SJohannes Weiner gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1045f91e81d3SJohannes Weiner if (zpool_malloc_support_movable(zpool)) 1046f91e81d3SJohannes Weiner gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 104755e78c93SBarry Song alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle); 104855e78c93SBarry Song if (alloc_ret) 1049f91e81d3SJohannes Weiner goto unlock; 1050f91e81d3SJohannes Weiner 1051f91e81d3SJohannes Weiner buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 1052f91e81d3SJohannes Weiner memcpy(buf, dst, dlen); 1053f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, handle); 1054f91e81d3SJohannes Weiner 1055f91e81d3SJohannes Weiner entry->handle = handle; 1056f91e81d3SJohannes Weiner entry->length = dlen; 1057f91e81d3SJohannes Weiner 1058f91e81d3SJohannes Weiner unlock: 105955e78c93SBarry Song if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) 106055e78c93SBarry Song zswap_reject_compress_poor++; 106155e78c93SBarry Song else if (comp_ret) 106255e78c93SBarry Song zswap_reject_compress_fail++; 106355e78c93SBarry Song else if (alloc_ret) 106455e78c93SBarry Song zswap_reject_alloc_fail++; 106555e78c93SBarry Song 1066f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 106755e78c93SBarry Song return comp_ret == 0 && alloc_ret == 0; 1068f91e81d3SJohannes Weiner } 1069f91e81d3SJohannes Weiner 1070f91e81d3SJohannes Weiner static void zswap_decompress(struct zswap_entry *entry, struct page *page) 1071f91e81d3SJohannes Weiner { 1072f91e81d3SJohannes Weiner struct zpool *zpool = zswap_find_zpool(entry); 1073f91e81d3SJohannes Weiner struct scatterlist input, output; 1074f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1075f91e81d3SJohannes Weiner u8 *src; 1076f91e81d3SJohannes Weiner 1077f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1078f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1079f91e81d3SJohannes Weiner 1080f91e81d3SJohannes Weiner src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 1081f91e81d3SJohannes Weiner if (!zpool_can_sleep_mapped(zpool)) { 1082f91e81d3SJohannes Weiner memcpy(acomp_ctx->buffer, src, entry->length); 1083f91e81d3SJohannes Weiner src = acomp_ctx->buffer; 1084f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1085f91e81d3SJohannes Weiner } 1086f91e81d3SJohannes Weiner 1087f91e81d3SJohannes Weiner sg_init_one(&input, src, entry->length); 1088f91e81d3SJohannes Weiner sg_init_table(&output, 1); 1089f91e81d3SJohannes Weiner sg_set_page(&output, page, PAGE_SIZE, 0); 1090f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); 1091f91e81d3SJohannes Weiner BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); 1092f91e81d3SJohannes Weiner BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); 1093f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 1094f91e81d3SJohannes Weiner 1095f91e81d3SJohannes Weiner if (zpool_can_sleep_mapped(zpool)) 1096f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1097f91e81d3SJohannes Weiner } 1098f91e81d3SJohannes Weiner 1099f91e81d3SJohannes Weiner /********************************* 11009986d35dSJohannes Weiner * writeback code 11019986d35dSJohannes Weiner **********************************/ 11029986d35dSJohannes Weiner /* 11039986d35dSJohannes Weiner * Attempts to free an entry by adding a folio to the swap cache, 11049986d35dSJohannes Weiner * decompressing the entry data into the folio, and issuing a 11059986d35dSJohannes Weiner * bio write to write the folio back to the swap device. 11069986d35dSJohannes Weiner * 11079986d35dSJohannes Weiner * This can be thought of as a "resumed writeback" of the folio 11089986d35dSJohannes Weiner * to the swap device. We are basically resuming the same swap 11099986d35dSJohannes Weiner * writeback path that was intercepted with the zswap_store() 11109986d35dSJohannes Weiner * in the first place. After the folio has been decompressed into 11119986d35dSJohannes Weiner * the swap cache, the compressed version stored by zswap can be 11129986d35dSJohannes Weiner * freed. 11139986d35dSJohannes Weiner */ 11149986d35dSJohannes Weiner static int zswap_writeback_entry(struct zswap_entry *entry, 11159986d35dSJohannes Weiner swp_entry_t swpentry) 11169986d35dSJohannes Weiner { 11179986d35dSJohannes Weiner struct zswap_tree *tree; 11189986d35dSJohannes Weiner struct folio *folio; 11199986d35dSJohannes Weiner struct mempolicy *mpol; 11209986d35dSJohannes Weiner bool folio_was_allocated; 11219986d35dSJohannes Weiner struct writeback_control wbc = { 11229986d35dSJohannes Weiner .sync_mode = WB_SYNC_NONE, 11239986d35dSJohannes Weiner }; 11249986d35dSJohannes Weiner 11259986d35dSJohannes Weiner /* try to allocate swap cache folio */ 11269986d35dSJohannes Weiner mpol = get_task_policy(current); 11279986d35dSJohannes Weiner folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 11289986d35dSJohannes Weiner NO_INTERLEAVE_INDEX, &folio_was_allocated, true); 11299986d35dSJohannes Weiner if (!folio) 11309986d35dSJohannes Weiner return -ENOMEM; 11319986d35dSJohannes Weiner 11329986d35dSJohannes Weiner /* 11339986d35dSJohannes Weiner * Found an existing folio, we raced with swapin or concurrent 11349986d35dSJohannes Weiner * shrinker. We generally writeback cold folios from zswap, and 11359986d35dSJohannes Weiner * swapin means the folio just became hot, so skip this folio. 11369986d35dSJohannes Weiner * For unlikely concurrent shrinker case, it will be unlinked 11379986d35dSJohannes Weiner * and freed when invalidated by the concurrent shrinker anyway. 11389986d35dSJohannes Weiner */ 11399986d35dSJohannes Weiner if (!folio_was_allocated) { 11409986d35dSJohannes Weiner folio_put(folio); 11419986d35dSJohannes Weiner return -EEXIST; 11429986d35dSJohannes Weiner } 11439986d35dSJohannes Weiner 11449986d35dSJohannes Weiner /* 11459986d35dSJohannes Weiner * folio is locked, and the swapcache is now secured against 1146f9c0f1c3SChengming Zhou * concurrent swapping to and from the slot, and concurrent 1147f9c0f1c3SChengming Zhou * swapoff so we can safely dereference the zswap tree here. 1148f9c0f1c3SChengming Zhou * Verify that the swap entry hasn't been invalidated and recycled 1149f9c0f1c3SChengming Zhou * behind our backs, to avoid overwriting a new swap folio with 1150f9c0f1c3SChengming Zhou * old compressed data. Only when this is successful can the entry 1151f9c0f1c3SChengming Zhou * be dereferenced. 11529986d35dSJohannes Weiner */ 11539986d35dSJohannes Weiner tree = swap_zswap_tree(swpentry); 11549986d35dSJohannes Weiner spin_lock(&tree->lock); 11559986d35dSJohannes Weiner if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) { 11569986d35dSJohannes Weiner spin_unlock(&tree->lock); 11579986d35dSJohannes Weiner delete_from_swap_cache(folio); 11589986d35dSJohannes Weiner folio_unlock(folio); 11599986d35dSJohannes Weiner folio_put(folio); 11609986d35dSJohannes Weiner return -ENOMEM; 11619986d35dSJohannes Weiner } 11629986d35dSJohannes Weiner 11639986d35dSJohannes Weiner /* Safe to deref entry after the entry is verified above. */ 1164a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 11659986d35dSJohannes Weiner spin_unlock(&tree->lock); 11669986d35dSJohannes Weiner 11679986d35dSJohannes Weiner zswap_decompress(entry, &folio->page); 11689986d35dSJohannes Weiner 11699986d35dSJohannes Weiner count_vm_event(ZSWPWB); 11709986d35dSJohannes Weiner if (entry->objcg) 11719986d35dSJohannes Weiner count_objcg_event(entry->objcg, ZSWPWB); 11729986d35dSJohannes Weiner 1173a230c20eSChengming Zhou zswap_entry_free(entry); 11749986d35dSJohannes Weiner 11759986d35dSJohannes Weiner /* folio is up to date */ 11769986d35dSJohannes Weiner folio_mark_uptodate(folio); 11779986d35dSJohannes Weiner 11789986d35dSJohannes Weiner /* move it to the tail of the inactive list after end_writeback */ 11799986d35dSJohannes Weiner folio_set_reclaim(folio); 11809986d35dSJohannes Weiner 11819986d35dSJohannes Weiner /* start writeback */ 11829986d35dSJohannes Weiner __swap_writepage(folio, &wbc); 11839986d35dSJohannes Weiner folio_put(folio); 11849986d35dSJohannes Weiner 11859986d35dSJohannes Weiner return 0; 11869986d35dSJohannes Weiner } 11879986d35dSJohannes Weiner 11889986d35dSJohannes Weiner /********************************* 1189b5ba474fSNhat Pham * shrinker functions 1190b5ba474fSNhat Pham **********************************/ 1191b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 1192eb23ee4fSJohannes Weiner spinlock_t *lock, void *arg) 1193eb23ee4fSJohannes Weiner { 1194eb23ee4fSJohannes Weiner struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); 1195eb23ee4fSJohannes Weiner bool *encountered_page_in_swapcache = (bool *)arg; 1196eb23ee4fSJohannes Weiner swp_entry_t swpentry; 1197eb23ee4fSJohannes Weiner enum lru_status ret = LRU_REMOVED_RETRY; 1198eb23ee4fSJohannes Weiner int writeback_result; 1199eb23ee4fSJohannes Weiner 1200eb23ee4fSJohannes Weiner /* 1201f9c0f1c3SChengming Zhou * As soon as we drop the LRU lock, the entry can be freed by 1202f9c0f1c3SChengming Zhou * a concurrent invalidation. This means the following: 1203eb23ee4fSJohannes Weiner * 1204f9c0f1c3SChengming Zhou * 1. We extract the swp_entry_t to the stack, allowing 1205f9c0f1c3SChengming Zhou * zswap_writeback_entry() to pin the swap entry and 1206f9c0f1c3SChengming Zhou * then validate the zwap entry against that swap entry's 1207f9c0f1c3SChengming Zhou * tree using pointer value comparison. Only when that 1208f9c0f1c3SChengming Zhou * is successful can the entry be dereferenced. 1209f9c0f1c3SChengming Zhou * 1210f9c0f1c3SChengming Zhou * 2. Usually, objects are taken off the LRU for reclaim. In 1211f9c0f1c3SChengming Zhou * this case this isn't possible, because if reclaim fails 1212f9c0f1c3SChengming Zhou * for whatever reason, we have no means of knowing if the 1213f9c0f1c3SChengming Zhou * entry is alive to put it back on the LRU. 1214f9c0f1c3SChengming Zhou * 1215f9c0f1c3SChengming Zhou * So rotate it before dropping the lock. If the entry is 1216f9c0f1c3SChengming Zhou * written back or invalidated, the free path will unlink 1217f9c0f1c3SChengming Zhou * it. For failures, rotation is the right thing as well. 1218eb23ee4fSJohannes Weiner * 1219eb23ee4fSJohannes Weiner * Temporary failures, where the same entry should be tried 1220eb23ee4fSJohannes Weiner * again immediately, almost never happen for this shrinker. 1221eb23ee4fSJohannes Weiner * We don't do any trylocking; -ENOMEM comes closest, 1222eb23ee4fSJohannes Weiner * but that's extremely rare and doesn't happen spuriously 1223eb23ee4fSJohannes Weiner * either. Don't bother distinguishing this case. 1224eb23ee4fSJohannes Weiner */ 1225eb23ee4fSJohannes Weiner list_move_tail(item, &l->list); 1226eb23ee4fSJohannes Weiner 1227eb23ee4fSJohannes Weiner /* 1228eb23ee4fSJohannes Weiner * Once the lru lock is dropped, the entry might get freed. The 1229eb23ee4fSJohannes Weiner * swpentry is copied to the stack, and entry isn't deref'd again 1230eb23ee4fSJohannes Weiner * until the entry is verified to still be alive in the tree. 1231eb23ee4fSJohannes Weiner */ 1232eb23ee4fSJohannes Weiner swpentry = entry->swpentry; 1233eb23ee4fSJohannes Weiner 1234eb23ee4fSJohannes Weiner /* 1235eb23ee4fSJohannes Weiner * It's safe to drop the lock here because we return either 1236eb23ee4fSJohannes Weiner * LRU_REMOVED_RETRY or LRU_RETRY. 1237eb23ee4fSJohannes Weiner */ 1238eb23ee4fSJohannes Weiner spin_unlock(lock); 1239eb23ee4fSJohannes Weiner 1240eb23ee4fSJohannes Weiner writeback_result = zswap_writeback_entry(entry, swpentry); 1241eb23ee4fSJohannes Weiner 1242eb23ee4fSJohannes Weiner if (writeback_result) { 1243eb23ee4fSJohannes Weiner zswap_reject_reclaim_fail++; 1244eb23ee4fSJohannes Weiner ret = LRU_RETRY; 1245eb23ee4fSJohannes Weiner 1246eb23ee4fSJohannes Weiner /* 1247eb23ee4fSJohannes Weiner * Encountering a page already in swap cache is a sign that we are shrinking 1248eb23ee4fSJohannes Weiner * into the warmer region. We should terminate shrinking (if we're in the dynamic 1249eb23ee4fSJohannes Weiner * shrinker context). 1250eb23ee4fSJohannes Weiner */ 1251b49547adSChengming Zhou if (writeback_result == -EEXIST && encountered_page_in_swapcache) { 1252b49547adSChengming Zhou ret = LRU_STOP; 1253eb23ee4fSJohannes Weiner *encountered_page_in_swapcache = true; 1254b49547adSChengming Zhou } 1255eb23ee4fSJohannes Weiner } else { 1256eb23ee4fSJohannes Weiner zswap_written_back_pages++; 1257eb23ee4fSJohannes Weiner } 1258eb23ee4fSJohannes Weiner 1259eb23ee4fSJohannes Weiner spin_lock(lock); 1260eb23ee4fSJohannes Weiner return ret; 1261eb23ee4fSJohannes Weiner } 1262b5ba474fSNhat Pham 1263b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, 1264b5ba474fSNhat Pham struct shrink_control *sc) 1265b5ba474fSNhat Pham { 1266b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); 1267b5ba474fSNhat Pham unsigned long shrink_ret, nr_protected, lru_size; 1268b5ba474fSNhat Pham bool encountered_page_in_swapcache = false; 1269b5ba474fSNhat Pham 1270501a06feSNhat Pham if (!zswap_shrinker_enabled || 1271501a06feSNhat Pham !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { 1272b5ba474fSNhat Pham sc->nr_scanned = 0; 1273b5ba474fSNhat Pham return SHRINK_STOP; 1274b5ba474fSNhat Pham } 1275b5ba474fSNhat Pham 1276b5ba474fSNhat Pham nr_protected = 1277b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1278*e35606e4SChengming Zhou lru_size = list_lru_shrink_count(&zswap_list_lru, sc); 1279b5ba474fSNhat Pham 1280b5ba474fSNhat Pham /* 1281b5ba474fSNhat Pham * Abort if we are shrinking into the protected region. 1282b5ba474fSNhat Pham * 1283b5ba474fSNhat Pham * This short-circuiting is necessary because if we have too many multiple 1284b5ba474fSNhat Pham * concurrent reclaimers getting the freeable zswap object counts at the 1285b5ba474fSNhat Pham * same time (before any of them made reasonable progress), the total 1286b5ba474fSNhat Pham * number of reclaimed objects might be more than the number of unprotected 1287b5ba474fSNhat Pham * objects (i.e the reclaimers will reclaim into the protected area of the 1288b5ba474fSNhat Pham * zswap LRU). 1289b5ba474fSNhat Pham */ 1290b5ba474fSNhat Pham if (nr_protected >= lru_size - sc->nr_to_scan) { 1291b5ba474fSNhat Pham sc->nr_scanned = 0; 1292b5ba474fSNhat Pham return SHRINK_STOP; 1293b5ba474fSNhat Pham } 1294b5ba474fSNhat Pham 1295*e35606e4SChengming Zhou shrink_ret = list_lru_shrink_walk(&zswap_list_lru, sc, &shrink_memcg_cb, 1296b5ba474fSNhat Pham &encountered_page_in_swapcache); 1297b5ba474fSNhat Pham 1298b5ba474fSNhat Pham if (encountered_page_in_swapcache) 1299b5ba474fSNhat Pham return SHRINK_STOP; 1300b5ba474fSNhat Pham 1301b5ba474fSNhat Pham return shrink_ret ? shrink_ret : SHRINK_STOP; 1302b5ba474fSNhat Pham } 1303b5ba474fSNhat Pham 1304b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker, 1305b5ba474fSNhat Pham struct shrink_control *sc) 1306b5ba474fSNhat Pham { 1307b5ba474fSNhat Pham struct mem_cgroup *memcg = sc->memcg; 1308b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); 1309b5ba474fSNhat Pham unsigned long nr_backing, nr_stored, nr_freeable, nr_protected; 1310b5ba474fSNhat Pham 1311501a06feSNhat Pham if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) 1312b5ba474fSNhat Pham return 0; 1313b5ba474fSNhat Pham 1314b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM 13157d7ef0a4SYosry Ahmed mem_cgroup_flush_stats(memcg); 1316b5ba474fSNhat Pham nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; 1317b5ba474fSNhat Pham nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); 1318b5ba474fSNhat Pham #else 1319b5ba474fSNhat Pham /* use pool stats instead of memcg stats */ 1320bf9b7df2SChengming Zhou nr_backing = zswap_pool_total_size >> PAGE_SHIFT; 1321*e35606e4SChengming Zhou nr_stored = atomic_read(&zswap_nr_stored); 1322b5ba474fSNhat Pham #endif 1323b5ba474fSNhat Pham 1324b5ba474fSNhat Pham if (!nr_stored) 1325b5ba474fSNhat Pham return 0; 1326b5ba474fSNhat Pham 1327b5ba474fSNhat Pham nr_protected = 1328b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1329*e35606e4SChengming Zhou nr_freeable = list_lru_shrink_count(&zswap_list_lru, sc); 1330b5ba474fSNhat Pham /* 1331b5ba474fSNhat Pham * Subtract the lru size by an estimate of the number of pages 1332b5ba474fSNhat Pham * that should be protected. 1333b5ba474fSNhat Pham */ 1334b5ba474fSNhat Pham nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0; 1335b5ba474fSNhat Pham 1336b5ba474fSNhat Pham /* 1337b5ba474fSNhat Pham * Scale the number of freeable pages by the memory saving factor. 1338b5ba474fSNhat Pham * This ensures that the better zswap compresses memory, the fewer 1339b5ba474fSNhat Pham * pages we will evict to swap (as it will otherwise incur IO for 1340b5ba474fSNhat Pham * relatively small memory saving). 1341b5ba474fSNhat Pham */ 1342b5ba474fSNhat Pham return mult_frac(nr_freeable, nr_backing, nr_stored); 1343b5ba474fSNhat Pham } 1344b5ba474fSNhat Pham 1345bf9b7df2SChengming Zhou static struct shrinker *zswap_alloc_shrinker(void) 1346b5ba474fSNhat Pham { 1347bf9b7df2SChengming Zhou struct shrinker *shrinker; 1348b5ba474fSNhat Pham 1349bf9b7df2SChengming Zhou shrinker = 1350bf9b7df2SChengming Zhou shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); 1351bf9b7df2SChengming Zhou if (!shrinker) 1352bf9b7df2SChengming Zhou return NULL; 1353bf9b7df2SChengming Zhou 1354bf9b7df2SChengming Zhou shrinker->scan_objects = zswap_shrinker_scan; 1355bf9b7df2SChengming Zhou shrinker->count_objects = zswap_shrinker_count; 1356bf9b7df2SChengming Zhou shrinker->batch = 0; 1357bf9b7df2SChengming Zhou shrinker->seeks = DEFAULT_SEEKS; 1358bf9b7df2SChengming Zhou return shrinker; 1359b5ba474fSNhat Pham } 1360b5ba474fSNhat Pham 1361a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg) 1362a65b0e76SDomenico Cerasuolo { 1363a65b0e76SDomenico Cerasuolo int nid, shrunk = 0; 1364a65b0e76SDomenico Cerasuolo 1365501a06feSNhat Pham if (!mem_cgroup_zswap_writeback_enabled(memcg)) 1366501a06feSNhat Pham return -EINVAL; 1367501a06feSNhat Pham 1368a65b0e76SDomenico Cerasuolo /* 1369a65b0e76SDomenico Cerasuolo * Skip zombies because their LRUs are reparented and we would be 1370a65b0e76SDomenico Cerasuolo * reclaiming from the parent instead of the dead memcg. 1371a65b0e76SDomenico Cerasuolo */ 1372a65b0e76SDomenico Cerasuolo if (memcg && !mem_cgroup_online(memcg)) 1373a65b0e76SDomenico Cerasuolo return -ENOENT; 1374a65b0e76SDomenico Cerasuolo 1375a65b0e76SDomenico Cerasuolo for_each_node_state(nid, N_NORMAL_MEMORY) { 1376a65b0e76SDomenico Cerasuolo unsigned long nr_to_walk = 1; 1377a65b0e76SDomenico Cerasuolo 1378*e35606e4SChengming Zhou shrunk += list_lru_walk_one(&zswap_list_lru, nid, memcg, 1379a65b0e76SDomenico Cerasuolo &shrink_memcg_cb, NULL, &nr_to_walk); 1380a65b0e76SDomenico Cerasuolo } 1381a65b0e76SDomenico Cerasuolo return shrunk ? 0 : -EAGAIN; 1382f999f38bSDomenico Cerasuolo } 1383f999f38bSDomenico Cerasuolo 138445190f01SVitaly Wool static void shrink_worker(struct work_struct *w) 138545190f01SVitaly Wool { 1386a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 1387e0228d59SDomenico Cerasuolo int ret, failures = 0; 138845190f01SVitaly Wool 1389a65b0e76SDomenico Cerasuolo /* global reclaim will select cgroup in a round-robin fashion. */ 1390e0228d59SDomenico Cerasuolo do { 1391*e35606e4SChengming Zhou spin_lock(&zswap_shrink_lock); 1392*e35606e4SChengming Zhou zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); 1393*e35606e4SChengming Zhou memcg = zswap_next_shrink; 1394a65b0e76SDomenico Cerasuolo 1395a65b0e76SDomenico Cerasuolo /* 1396a65b0e76SDomenico Cerasuolo * We need to retry if we have gone through a full round trip, or if we 1397a65b0e76SDomenico Cerasuolo * got an offline memcg (or else we risk undoing the effect of the 1398a65b0e76SDomenico Cerasuolo * zswap memcg offlining cleanup callback). This is not catastrophic 1399a65b0e76SDomenico Cerasuolo * per se, but it will keep the now offlined memcg hostage for a while. 1400a65b0e76SDomenico Cerasuolo * 1401a65b0e76SDomenico Cerasuolo * Note that if we got an online memcg, we will keep the extra 1402a65b0e76SDomenico Cerasuolo * reference in case the original reference obtained by mem_cgroup_iter 1403a65b0e76SDomenico Cerasuolo * is dropped by the zswap memcg offlining callback, ensuring that the 1404a65b0e76SDomenico Cerasuolo * memcg is not killed when we are reclaiming. 1405a65b0e76SDomenico Cerasuolo */ 1406a65b0e76SDomenico Cerasuolo if (!memcg) { 1407*e35606e4SChengming Zhou spin_unlock(&zswap_shrink_lock); 1408e0228d59SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1409e0228d59SDomenico Cerasuolo break; 1410a65b0e76SDomenico Cerasuolo 1411a65b0e76SDomenico Cerasuolo goto resched; 1412e0228d59SDomenico Cerasuolo } 1413a65b0e76SDomenico Cerasuolo 1414a65b0e76SDomenico Cerasuolo if (!mem_cgroup_tryget_online(memcg)) { 1415a65b0e76SDomenico Cerasuolo /* drop the reference from mem_cgroup_iter() */ 1416a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, memcg); 1417*e35606e4SChengming Zhou zswap_next_shrink = NULL; 1418*e35606e4SChengming Zhou spin_unlock(&zswap_shrink_lock); 1419a65b0e76SDomenico Cerasuolo 1420a65b0e76SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1421a65b0e76SDomenico Cerasuolo break; 1422a65b0e76SDomenico Cerasuolo 1423a65b0e76SDomenico Cerasuolo goto resched; 1424a65b0e76SDomenico Cerasuolo } 1425*e35606e4SChengming Zhou spin_unlock(&zswap_shrink_lock); 1426a65b0e76SDomenico Cerasuolo 1427a65b0e76SDomenico Cerasuolo ret = shrink_memcg(memcg); 1428a65b0e76SDomenico Cerasuolo /* drop the extra reference */ 1429a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1430a65b0e76SDomenico Cerasuolo 1431a65b0e76SDomenico Cerasuolo if (ret == -EINVAL) 1432a65b0e76SDomenico Cerasuolo break; 1433a65b0e76SDomenico Cerasuolo if (ret && ++failures == MAX_RECLAIM_RETRIES) 1434a65b0e76SDomenico Cerasuolo break; 1435a65b0e76SDomenico Cerasuolo 1436a65b0e76SDomenico Cerasuolo resched: 1437e0228d59SDomenico Cerasuolo cond_resched(); 1438e0228d59SDomenico Cerasuolo } while (!zswap_can_accept()); 143945190f01SVitaly Wool } 144045190f01SVitaly Wool 1441a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1442a85f878bSSrividya Desireddy { 1443a85f878bSSrividya Desireddy unsigned long *page; 144462bf1258STaejoon Song unsigned long val; 144562bf1258STaejoon Song unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1446a85f878bSSrividya Desireddy 1447a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 144862bf1258STaejoon Song val = page[0]; 144962bf1258STaejoon Song 145062bf1258STaejoon Song if (val != page[last_pos]) 145162bf1258STaejoon Song return 0; 145262bf1258STaejoon Song 145362bf1258STaejoon Song for (pos = 1; pos < last_pos; pos++) { 145462bf1258STaejoon Song if (val != page[pos]) 1455a85f878bSSrividya Desireddy return 0; 1456a85f878bSSrividya Desireddy } 145762bf1258STaejoon Song 145862bf1258STaejoon Song *value = val; 145962bf1258STaejoon Song 1460a85f878bSSrividya Desireddy return 1; 1461a85f878bSSrividya Desireddy } 1462a85f878bSSrividya Desireddy 1463a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value) 1464a85f878bSSrividya Desireddy { 1465a85f878bSSrividya Desireddy unsigned long *page; 1466a85f878bSSrividya Desireddy 1467a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 1468a85f878bSSrividya Desireddy memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1469a85f878bSSrividya Desireddy } 1470a85f878bSSrividya Desireddy 147134f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio) 14722b281117SSeth Jennings { 14733d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 147442c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 147544c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 14762b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 1477f4840ccfSJohannes Weiner struct obj_cgroup *objcg = NULL; 1478a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg = NULL; 147942c06a0eSJohannes Weiner 148034f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 148134f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 14822b281117SSeth Jennings 148334f4c198SMatthew Wilcox (Oracle) /* Large folios aren't supported */ 148434f4c198SMatthew Wilcox (Oracle) if (folio_test_large(folio)) 148542c06a0eSJohannes Weiner return false; 14867ba71669SHuang Ying 1487678e54d4SChengming Zhou if (!zswap_enabled) 1488f576a1e8SChengming Zhou goto check_old; 1489678e54d4SChengming Zhou 1490074e3e26SMatthew Wilcox (Oracle) objcg = get_obj_cgroup_from_folio(folio); 1491a65b0e76SDomenico Cerasuolo if (objcg && !obj_cgroup_may_zswap(objcg)) { 1492a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1493a65b0e76SDomenico Cerasuolo if (shrink_memcg(memcg)) { 1494a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 14950bdf0efaSNhat Pham goto reject; 1496a65b0e76SDomenico Cerasuolo } 1497a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1498a65b0e76SDomenico Cerasuolo } 1499f4840ccfSJohannes Weiner 15002b281117SSeth Jennings /* reclaim space if needed */ 15012b281117SSeth Jennings if (zswap_is_full()) { 15022b281117SSeth Jennings zswap_pool_limit_hit++; 150345190f01SVitaly Wool zswap_pool_reached_full = true; 1504f4840ccfSJohannes Weiner goto shrink; 15052b281117SSeth Jennings } 150616e536efSLi Wang 150745190f01SVitaly Wool if (zswap_pool_reached_full) { 150842c06a0eSJohannes Weiner if (!zswap_can_accept()) 1509e0228d59SDomenico Cerasuolo goto shrink; 151042c06a0eSJohannes Weiner else 151145190f01SVitaly Wool zswap_pool_reached_full = false; 15122b281117SSeth Jennings } 15132b281117SSeth Jennings 15142b281117SSeth Jennings /* allocate entry */ 1515be7fc97cSJohannes Weiner entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); 15162b281117SSeth Jennings if (!entry) { 15172b281117SSeth Jennings zswap_reject_kmemcache_fail++; 15182b281117SSeth Jennings goto reject; 15192b281117SSeth Jennings } 15202b281117SSeth Jennings 1521a85f878bSSrividya Desireddy if (zswap_same_filled_pages_enabled) { 1522be7fc97cSJohannes Weiner unsigned long value; 1523be7fc97cSJohannes Weiner u8 *src; 1524be7fc97cSJohannes Weiner 1525be7fc97cSJohannes Weiner src = kmap_local_folio(folio, 0); 1526a85f878bSSrividya Desireddy if (zswap_is_page_same_filled(src, &value)) { 1527003ae2fbSFabio M. De Francesco kunmap_local(src); 1528a85f878bSSrividya Desireddy entry->length = 0; 1529a85f878bSSrividya Desireddy entry->value = value; 1530a85f878bSSrividya Desireddy atomic_inc(&zswap_same_filled_pages); 1531a85f878bSSrividya Desireddy goto insert_entry; 1532a85f878bSSrividya Desireddy } 1533003ae2fbSFabio M. De Francesco kunmap_local(src); 1534a85f878bSSrividya Desireddy } 1535a85f878bSSrividya Desireddy 153642c06a0eSJohannes Weiner if (!zswap_non_same_filled_pages_enabled) 1537cb325dddSMaciej S. Szmigiero goto freepage; 1538cb325dddSMaciej S. Szmigiero 1539f1c54846SDan Streetman /* if entry is successfully added, it keeps the reference */ 1540f1c54846SDan Streetman entry->pool = zswap_pool_current_get(); 154142c06a0eSJohannes Weiner if (!entry->pool) 15422b281117SSeth Jennings goto freepage; 15432b281117SSeth Jennings 1544a65b0e76SDomenico Cerasuolo if (objcg) { 1545a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1546*e35606e4SChengming Zhou if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { 1547a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1548a65b0e76SDomenico Cerasuolo goto put_pool; 1549a65b0e76SDomenico Cerasuolo } 1550a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1551a65b0e76SDomenico Cerasuolo } 1552a65b0e76SDomenico Cerasuolo 1553fa9ad6e2SJohannes Weiner if (!zswap_compress(folio, entry)) 1554fa9ad6e2SJohannes Weiner goto put_pool; 15551ec3b5feSBarry Song 1556a85f878bSSrividya Desireddy insert_entry: 1557be7fc97cSJohannes Weiner entry->swpentry = swp; 1558f4840ccfSJohannes Weiner entry->objcg = objcg; 1559f4840ccfSJohannes Weiner if (objcg) { 1560f4840ccfSJohannes Weiner obj_cgroup_charge_zswap(objcg, entry->length); 1561f4840ccfSJohannes Weiner /* Account before objcg ref is moved to tree */ 1562f4840ccfSJohannes Weiner count_objcg_event(objcg, ZSWPOUT); 1563f4840ccfSJohannes Weiner } 1564f4840ccfSJohannes Weiner 15652b281117SSeth Jennings /* map */ 15662b281117SSeth Jennings spin_lock(&tree->lock); 1567ca56489cSDomenico Cerasuolo /* 1568f576a1e8SChengming Zhou * The folio may have been dirtied again, invalidate the 1569f576a1e8SChengming Zhou * possibly stale entry before inserting the new entry. 1570ca56489cSDomenico Cerasuolo */ 1571f576a1e8SChengming Zhou if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 157256c67049SJohannes Weiner zswap_invalidate_entry(tree, dupentry); 1573f576a1e8SChengming Zhou WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry)); 15742b281117SSeth Jennings } 157535499e2bSDomenico Cerasuolo if (entry->length) { 1576a65b0e76SDomenico Cerasuolo INIT_LIST_HEAD(&entry->lru); 1577*e35606e4SChengming Zhou zswap_lru_add(&zswap_list_lru, entry); 1578*e35606e4SChengming Zhou atomic_inc(&zswap_nr_stored); 1579f999f38bSDomenico Cerasuolo } 15802b281117SSeth Jennings spin_unlock(&tree->lock); 15812b281117SSeth Jennings 15822b281117SSeth Jennings /* update stats */ 15832b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 1584f1c54846SDan Streetman zswap_update_total_size(); 1585f6498b77SJohannes Weiner count_vm_event(ZSWPOUT); 15862b281117SSeth Jennings 158742c06a0eSJohannes Weiner return true; 15882b281117SSeth Jennings 1589a65b0e76SDomenico Cerasuolo put_pool: 1590f1c54846SDan Streetman zswap_pool_put(entry->pool); 1591f1c54846SDan Streetman freepage: 15922b281117SSeth Jennings zswap_entry_cache_free(entry); 15932b281117SSeth Jennings reject: 1594f4840ccfSJohannes Weiner if (objcg) 1595f4840ccfSJohannes Weiner obj_cgroup_put(objcg); 1596f576a1e8SChengming Zhou check_old: 1597f576a1e8SChengming Zhou /* 1598f576a1e8SChengming Zhou * If the zswap store fails or zswap is disabled, we must invalidate the 1599f576a1e8SChengming Zhou * possibly stale entry which was previously stored at this offset. 1600f576a1e8SChengming Zhou * Otherwise, writeback could overwrite the new data in the swapfile. 1601f576a1e8SChengming Zhou */ 1602f576a1e8SChengming Zhou spin_lock(&tree->lock); 1603f576a1e8SChengming Zhou entry = zswap_rb_search(&tree->rbroot, offset); 1604f576a1e8SChengming Zhou if (entry) 1605f576a1e8SChengming Zhou zswap_invalidate_entry(tree, entry); 1606f576a1e8SChengming Zhou spin_unlock(&tree->lock); 160742c06a0eSJohannes Weiner return false; 1608f4840ccfSJohannes Weiner 1609f4840ccfSJohannes Weiner shrink: 1610*e35606e4SChengming Zhou queue_work(shrink_wq, &zswap_shrink_work); 1611f4840ccfSJohannes Weiner goto reject; 16122b281117SSeth Jennings } 16132b281117SSeth Jennings 1614ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio) 16152b281117SSeth Jennings { 16163d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 161742c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 1618ca54f6d8SMatthew Wilcox (Oracle) struct page *page = &folio->page; 161944c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 16202b281117SSeth Jennings struct zswap_entry *entry; 162132acba4cSChengming Zhou u8 *dst; 162242c06a0eSJohannes Weiner 1623ca54f6d8SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 16242b281117SSeth Jennings 16252b281117SSeth Jennings spin_lock(&tree->lock); 16265b297f70SJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 16272b281117SSeth Jennings if (!entry) { 16282b281117SSeth Jennings spin_unlock(&tree->lock); 162942c06a0eSJohannes Weiner return false; 16302b281117SSeth Jennings } 1631a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 16322b281117SSeth Jennings spin_unlock(&tree->lock); 16332b281117SSeth Jennings 163466447fd0SChengming Zhou if (entry->length) 1635ff2972aaSJohannes Weiner zswap_decompress(entry, page); 163666447fd0SChengming Zhou else { 1637003ae2fbSFabio M. De Francesco dst = kmap_local_page(page); 1638a85f878bSSrividya Desireddy zswap_fill_page(dst, entry->value); 1639003ae2fbSFabio M. De Francesco kunmap_local(dst); 1640a85f878bSSrividya Desireddy } 1641a85f878bSSrividya Desireddy 1642f6498b77SJohannes Weiner count_vm_event(ZSWPIN); 1643f4840ccfSJohannes Weiner if (entry->objcg) 1644f4840ccfSJohannes Weiner count_objcg_event(entry->objcg, ZSWPIN); 1645c75f5c1eSChengming Zhou 1646a230c20eSChengming Zhou zswap_entry_free(entry); 16472b281117SSeth Jennings 1648c2e2ba77SChengming Zhou folio_mark_dirty(folio); 1649c2e2ba77SChengming Zhou 165066447fd0SChengming Zhou return true; 16512b281117SSeth Jennings } 16522b281117SSeth Jennings 16530827a1fbSChengming Zhou void zswap_invalidate(swp_entry_t swp) 16542b281117SSeth Jennings { 16550827a1fbSChengming Zhou pgoff_t offset = swp_offset(swp); 16560827a1fbSChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 16572b281117SSeth Jennings struct zswap_entry *entry; 16582b281117SSeth Jennings 16592b281117SSeth Jennings spin_lock(&tree->lock); 16602b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 166106ed2289SJohannes Weiner if (entry) 1662b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 16632b281117SSeth Jennings spin_unlock(&tree->lock); 16642b281117SSeth Jennings } 16652b281117SSeth Jennings 166644c7c734SChengming Zhou int zswap_swapon(int type, unsigned long nr_pages) 166742c06a0eSJohannes Weiner { 166844c7c734SChengming Zhou struct zswap_tree *trees, *tree; 166944c7c734SChengming Zhou unsigned int nr, i; 167042c06a0eSJohannes Weiner 167144c7c734SChengming Zhou nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 167244c7c734SChengming Zhou trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); 167344c7c734SChengming Zhou if (!trees) { 167442c06a0eSJohannes Weiner pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1675bb29fd77SChengming Zhou return -ENOMEM; 167642c06a0eSJohannes Weiner } 167742c06a0eSJohannes Weiner 167844c7c734SChengming Zhou for (i = 0; i < nr; i++) { 167944c7c734SChengming Zhou tree = trees + i; 168042c06a0eSJohannes Weiner tree->rbroot = RB_ROOT; 168142c06a0eSJohannes Weiner spin_lock_init(&tree->lock); 168244c7c734SChengming Zhou } 168344c7c734SChengming Zhou 168444c7c734SChengming Zhou nr_zswap_trees[type] = nr; 168544c7c734SChengming Zhou zswap_trees[type] = trees; 1686bb29fd77SChengming Zhou return 0; 168742c06a0eSJohannes Weiner } 168842c06a0eSJohannes Weiner 168942c06a0eSJohannes Weiner void zswap_swapoff(int type) 16902b281117SSeth Jennings { 169144c7c734SChengming Zhou struct zswap_tree *trees = zswap_trees[type]; 169244c7c734SChengming Zhou unsigned int i; 16932b281117SSeth Jennings 169444c7c734SChengming Zhou if (!trees) 16952b281117SSeth Jennings return; 16962b281117SSeth Jennings 169783e68f25SYosry Ahmed /* try_to_unuse() invalidated all the entries already */ 169883e68f25SYosry Ahmed for (i = 0; i < nr_zswap_trees[type]; i++) 169983e68f25SYosry Ahmed WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot)); 170044c7c734SChengming Zhou 170144c7c734SChengming Zhou kvfree(trees); 170244c7c734SChengming Zhou nr_zswap_trees[type] = 0; 1703aa9bca05SWeijie Yang zswap_trees[type] = NULL; 17042b281117SSeth Jennings } 17052b281117SSeth Jennings 17062b281117SSeth Jennings /********************************* 17072b281117SSeth Jennings * debugfs functions 17082b281117SSeth Jennings **********************************/ 17092b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 17102b281117SSeth Jennings #include <linux/debugfs.h> 17112b281117SSeth Jennings 17122b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 17132b281117SSeth Jennings 1714141fdeecSLiu Shixin static int zswap_debugfs_init(void) 17152b281117SSeth Jennings { 17162b281117SSeth Jennings if (!debugfs_initialized()) 17172b281117SSeth Jennings return -ENODEV; 17182b281117SSeth Jennings 17192b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 17202b281117SSeth Jennings 17210825a6f9SJoe Perches debugfs_create_u64("pool_limit_hit", 0444, 17222b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 17230825a6f9SJoe Perches debugfs_create_u64("reject_reclaim_fail", 0444, 17242b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 17250825a6f9SJoe Perches debugfs_create_u64("reject_alloc_fail", 0444, 17262b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 17270825a6f9SJoe Perches debugfs_create_u64("reject_kmemcache_fail", 0444, 17282b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1729cb61dad8SNhat Pham debugfs_create_u64("reject_compress_fail", 0444, 1730cb61dad8SNhat Pham zswap_debugfs_root, &zswap_reject_compress_fail); 17310825a6f9SJoe Perches debugfs_create_u64("reject_compress_poor", 0444, 17322b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 17330825a6f9SJoe Perches debugfs_create_u64("written_back_pages", 0444, 17342b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 17350825a6f9SJoe Perches debugfs_create_u64("pool_total_size", 0444, 173612d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 17370825a6f9SJoe Perches debugfs_create_atomic_t("stored_pages", 0444, 17382b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 1739a85f878bSSrividya Desireddy debugfs_create_atomic_t("same_filled_pages", 0444, 1740a85f878bSSrividya Desireddy zswap_debugfs_root, &zswap_same_filled_pages); 17412b281117SSeth Jennings 17422b281117SSeth Jennings return 0; 17432b281117SSeth Jennings } 17442b281117SSeth Jennings #else 1745141fdeecSLiu Shixin static int zswap_debugfs_init(void) 17462b281117SSeth Jennings { 17472b281117SSeth Jennings return 0; 17482b281117SSeth Jennings } 17492b281117SSeth Jennings #endif 17502b281117SSeth Jennings 17512b281117SSeth Jennings /********************************* 17522b281117SSeth Jennings * module init and exit 17532b281117SSeth Jennings **********************************/ 1754141fdeecSLiu Shixin static int zswap_setup(void) 17552b281117SSeth Jennings { 1756f1c54846SDan Streetman struct zswap_pool *pool; 1757ad7ed770SSebastian Andrzej Siewior int ret; 175860105e12SMinchan Kim 1759b7919122SLiu Shixin zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1760b7919122SLiu Shixin if (!zswap_entry_cache) { 17612b281117SSeth Jennings pr_err("entry cache creation failed\n"); 1762f1c54846SDan Streetman goto cache_fail; 17632b281117SSeth Jennings } 1764f1c54846SDan Streetman 1765cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1766cab7a7e5SSebastian Andrzej Siewior "mm/zswap_pool:prepare", 1767cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_prepare, 1768cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_dead); 1769cab7a7e5SSebastian Andrzej Siewior if (ret) 1770cab7a7e5SSebastian Andrzej Siewior goto hp_fail; 1771cab7a7e5SSebastian Andrzej Siewior 1772bf9b7df2SChengming Zhou shrink_wq = alloc_workqueue("zswap-shrink", 1773bf9b7df2SChengming Zhou WQ_UNBOUND|WQ_MEM_RECLAIM, 1); 1774bf9b7df2SChengming Zhou if (!shrink_wq) 1775bf9b7df2SChengming Zhou goto shrink_wq_fail; 1776bf9b7df2SChengming Zhou 1777*e35606e4SChengming Zhou zswap_shrinker = zswap_alloc_shrinker(); 1778*e35606e4SChengming Zhou if (!zswap_shrinker) 1779bf9b7df2SChengming Zhou goto shrinker_fail; 1780*e35606e4SChengming Zhou if (list_lru_init_memcg(&zswap_list_lru, zswap_shrinker)) 1781bf9b7df2SChengming Zhou goto lru_fail; 1782*e35606e4SChengming Zhou shrinker_register(zswap_shrinker); 1783bf9b7df2SChengming Zhou 1784*e35606e4SChengming Zhou INIT_WORK(&zswap_shrink_work, shrink_worker); 1785bf9b7df2SChengming Zhou 1786f1c54846SDan Streetman pool = __zswap_pool_create_fallback(); 1787ae3d89a7SDan Streetman if (pool) { 1788f1c54846SDan Streetman pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1789b8cf32dcSYosry Ahmed zpool_get_type(pool->zpools[0])); 1790f1c54846SDan Streetman list_add(&pool->list, &zswap_pools); 1791ae3d89a7SDan Streetman zswap_has_pool = true; 1792ae3d89a7SDan Streetman } else { 1793ae3d89a7SDan Streetman pr_err("pool creation failed\n"); 1794ae3d89a7SDan Streetman zswap_enabled = false; 1795ae3d89a7SDan Streetman } 179660105e12SMinchan Kim 17972b281117SSeth Jennings if (zswap_debugfs_init()) 17982b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 17999021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_SUCCEED; 18002b281117SSeth Jennings return 0; 1801f1c54846SDan Streetman 1802bf9b7df2SChengming Zhou lru_fail: 1803*e35606e4SChengming Zhou shrinker_free(zswap_shrinker); 1804bf9b7df2SChengming Zhou shrinker_fail: 1805bf9b7df2SChengming Zhou destroy_workqueue(shrink_wq); 1806bf9b7df2SChengming Zhou shrink_wq_fail: 1807bf9b7df2SChengming Zhou cpuhp_remove_multi_state(CPUHP_MM_ZSWP_POOL_PREPARE); 1808cab7a7e5SSebastian Andrzej Siewior hp_fail: 1809b7919122SLiu Shixin kmem_cache_destroy(zswap_entry_cache); 1810f1c54846SDan Streetman cache_fail: 1811d7b028f5SDan Streetman /* if built-in, we aren't unloaded on failure; don't allow use */ 18129021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_FAILED; 1813d7b028f5SDan Streetman zswap_enabled = false; 18142b281117SSeth Jennings return -ENOMEM; 18152b281117SSeth Jennings } 1816141fdeecSLiu Shixin 1817141fdeecSLiu Shixin static int __init zswap_init(void) 1818141fdeecSLiu Shixin { 1819141fdeecSLiu Shixin if (!zswap_enabled) 1820141fdeecSLiu Shixin return 0; 1821141fdeecSLiu Shixin return zswap_setup(); 1822141fdeecSLiu Shixin } 18232b281117SSeth Jennings /* must be late so crypto has time to come up */ 1824141fdeecSLiu Shixin late_initcall(zswap_init); 18252b281117SSeth Jennings 182668386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 18272b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 1828