1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22b281117SSeth Jennings /* 32b281117SSeth Jennings * zswap.c - zswap driver file 42b281117SSeth Jennings * 542c06a0eSJohannes Weiner * zswap is a cache that takes pages that are in the process 62b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 72b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 82b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 92b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 102b281117SSeth Jennings * 112b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 122b281117SSeth Jennings */ 132b281117SSeth Jennings 142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 152b281117SSeth Jennings 162b281117SSeth Jennings #include <linux/module.h> 172b281117SSeth Jennings #include <linux/cpu.h> 182b281117SSeth Jennings #include <linux/highmem.h> 192b281117SSeth Jennings #include <linux/slab.h> 202b281117SSeth Jennings #include <linux/spinlock.h> 212b281117SSeth Jennings #include <linux/types.h> 222b281117SSeth Jennings #include <linux/atomic.h> 232b281117SSeth Jennings #include <linux/rbtree.h> 242b281117SSeth Jennings #include <linux/swap.h> 252b281117SSeth Jennings #include <linux/crypto.h> 261ec3b5feSBarry Song #include <linux/scatterlist.h> 27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h> 282b281117SSeth Jennings #include <linux/mempool.h> 2912d79d64SDan Streetman #include <linux/zpool.h> 301ec3b5feSBarry Song #include <crypto/acompress.h> 3142c06a0eSJohannes Weiner #include <linux/zswap.h> 322b281117SSeth Jennings #include <linux/mm_types.h> 332b281117SSeth Jennings #include <linux/page-flags.h> 342b281117SSeth Jennings #include <linux/swapops.h> 352b281117SSeth Jennings #include <linux/writeback.h> 362b281117SSeth Jennings #include <linux/pagemap.h> 3745190f01SVitaly Wool #include <linux/workqueue.h> 38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h> 392b281117SSeth Jennings 40014bb1deSNeilBrown #include "swap.h" 41e0228d59SDomenico Cerasuolo #include "internal.h" 42014bb1deSNeilBrown 432b281117SSeth Jennings /********************************* 442b281117SSeth Jennings * statistics 452b281117SSeth Jennings **********************************/ 4612d79d64SDan Streetman /* Total bytes used by the compressed storage */ 47f6498b77SJohannes Weiner u64 zswap_pool_total_size; 482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0); 50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */ 51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */ 67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail; 682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 692b281117SSeth Jennings static u64 zswap_reject_compress_poor; 702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 712b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 742b281117SSeth Jennings 7545190f01SVitaly Wool /* Shrinker work queue */ 7645190f01SVitaly Wool static struct workqueue_struct *shrink_wq; 7745190f01SVitaly Wool /* Pool limit was hit, we need to calm down */ 7845190f01SVitaly Wool static bool zswap_pool_reached_full; 7945190f01SVitaly Wool 802b281117SSeth Jennings /********************************* 812b281117SSeth Jennings * tunables 822b281117SSeth Jennings **********************************/ 83c00ed16aSDan Streetman 84bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET "" 85bae21db8SDan Streetman 86141fdeecSLiu Shixin static int zswap_setup(void); 87141fdeecSLiu Shixin 88bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */ 89bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 90d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *, 91d7b028f5SDan Streetman const struct kernel_param *); 9283aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = { 93d7b028f5SDan Streetman .set = zswap_enabled_param_set, 94d7b028f5SDan Streetman .get = param_get_bool, 95d7b028f5SDan Streetman }; 96d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 972b281117SSeth Jennings 9890b0fc26SDan Streetman /* Crypto compressor to use */ 99bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 10090b0fc26SDan Streetman static int zswap_compressor_param_set(const char *, 10190b0fc26SDan Streetman const struct kernel_param *); 10283aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = { 10390b0fc26SDan Streetman .set = zswap_compressor_param_set, 104c99b42c3SDan Streetman .get = param_get_charp, 105c99b42c3SDan Streetman .free = param_free_charp, 10690b0fc26SDan Streetman }; 10790b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops, 108c99b42c3SDan Streetman &zswap_compressor, 0644); 10990b0fc26SDan Streetman 11090b0fc26SDan Streetman /* Compressed storage zpool to use */ 111bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 11290b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *); 11383aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = { 11490b0fc26SDan Streetman .set = zswap_zpool_param_set, 115c99b42c3SDan Streetman .get = param_get_charp, 116c99b42c3SDan Streetman .free = param_free_charp, 11790b0fc26SDan Streetman }; 118c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 1192b281117SSeth Jennings 1202b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 1212b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 12290b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 12360105e12SMinchan Kim 12445190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */ 12545190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 12645190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 12745190f01SVitaly Wool uint, 0644); 12845190f01SVitaly Wool 129cb325dddSMaciej S. Szmigiero /* 130cb325dddSMaciej S. Szmigiero * Enable/disable handling same-value filled pages (enabled by default). 131cb325dddSMaciej S. Szmigiero * If disabled every page is considered non-same-value filled. 132cb325dddSMaciej S. Szmigiero */ 133a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true; 134a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 135a85f878bSSrividya Desireddy bool, 0644); 136a85f878bSSrividya Desireddy 137cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */ 138cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true; 139cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 140cb325dddSMaciej S. Szmigiero bool, 0644); 141cb325dddSMaciej S. Szmigiero 142b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */ 143b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32 144b8cf32dcSYosry Ahmed 145b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */ 146b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED( 147b5ba474fSNhat Pham CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); 148b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); 149b5ba474fSNhat Pham 150501a06feSNhat Pham bool is_zswap_enabled(void) 151501a06feSNhat Pham { 152501a06feSNhat Pham return zswap_enabled; 153501a06feSNhat Pham } 154501a06feSNhat Pham 1552b281117SSeth Jennings /********************************* 1562b281117SSeth Jennings * data structures 1572b281117SSeth Jennings **********************************/ 158f1c54846SDan Streetman 1591ec3b5feSBarry Song struct crypto_acomp_ctx { 1601ec3b5feSBarry Song struct crypto_acomp *acomp; 1611ec3b5feSBarry Song struct acomp_req *req; 1621ec3b5feSBarry Song struct crypto_wait wait; 1638ba2f844SChengming Zhou u8 *buffer; 1648ba2f844SChengming Zhou struct mutex mutex; 1651ec3b5feSBarry Song }; 1661ec3b5feSBarry Song 167f999f38bSDomenico Cerasuolo /* 168f999f38bSDomenico Cerasuolo * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 169f999f38bSDomenico Cerasuolo * The only case where lru_lock is not acquired while holding tree.lock is 170f999f38bSDomenico Cerasuolo * when a zswap_entry is taken off the lru for writeback, in that case it 171f999f38bSDomenico Cerasuolo * needs to be verified that it's still valid in the tree. 172f999f38bSDomenico Cerasuolo */ 173f1c54846SDan Streetman struct zswap_pool { 174b8cf32dcSYosry Ahmed struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 1751ec3b5feSBarry Song struct crypto_acomp_ctx __percpu *acomp_ctx; 176*94ace3feSChengming Zhou struct percpu_ref ref; 177f1c54846SDan Streetman struct list_head list; 17845190f01SVitaly Wool struct work_struct release_work; 179cab7a7e5SSebastian Andrzej Siewior struct hlist_node node; 180f1c54846SDan Streetman char tfm_name[CRYPTO_MAX_ALG_NAME]; 181f1c54846SDan Streetman }; 182f1c54846SDan Streetman 183bf9b7df2SChengming Zhou static struct { 184bf9b7df2SChengming Zhou struct list_lru list_lru; 185bf9b7df2SChengming Zhou atomic_t nr_stored; 186bf9b7df2SChengming Zhou struct shrinker *shrinker; 187bf9b7df2SChengming Zhou struct work_struct shrink_work; 188bf9b7df2SChengming Zhou struct mem_cgroup *next_shrink; 189bf9b7df2SChengming Zhou /* The lock protects next_shrink. */ 190bf9b7df2SChengming Zhou spinlock_t shrink_lock; 191bf9b7df2SChengming Zhou } zswap; 192bf9b7df2SChengming Zhou 1932b281117SSeth Jennings /* 1942b281117SSeth Jennings * struct zswap_entry 1952b281117SSeth Jennings * 1962b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1972b281117SSeth Jennings * page within zswap. 1982b281117SSeth Jennings * 1992b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 20097157d89SXiu Jianfeng * swpentry - associated swap entry, the offset indexes into the red-black tree 2012b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 202f999f38bSDomenico Cerasuolo * decompression. For a same value filled page length is 0, and both 203f999f38bSDomenico Cerasuolo * pool and lru are invalid and must be ignored. 204f1c54846SDan Streetman * pool - the zswap_pool the entry's data is in 205f1c54846SDan Streetman * handle - zpool allocation handle that stores the compressed page data 206a85f878bSSrividya Desireddy * value - value of the same-value filled pages which have same content 20797157d89SXiu Jianfeng * objcg - the obj_cgroup that the compressed memory is charged to 208f999f38bSDomenico Cerasuolo * lru - handle to the pool's lru used to evict pages. 2092b281117SSeth Jennings */ 2102b281117SSeth Jennings struct zswap_entry { 2112b281117SSeth Jennings struct rb_node rbnode; 2120bb48849SDomenico Cerasuolo swp_entry_t swpentry; 2132b281117SSeth Jennings unsigned int length; 214f1c54846SDan Streetman struct zswap_pool *pool; 215a85f878bSSrividya Desireddy union { 2162b281117SSeth Jennings unsigned long handle; 217a85f878bSSrividya Desireddy unsigned long value; 218a85f878bSSrividya Desireddy }; 219f4840ccfSJohannes Weiner struct obj_cgroup *objcg; 220f999f38bSDomenico Cerasuolo struct list_head lru; 2212b281117SSeth Jennings }; 2222b281117SSeth Jennings 2232b281117SSeth Jennings struct zswap_tree { 2242b281117SSeth Jennings struct rb_root rbroot; 2252b281117SSeth Jennings spinlock_t lock; 2262b281117SSeth Jennings }; 2272b281117SSeth Jennings 2282b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 22944c7c734SChengming Zhou static unsigned int nr_zswap_trees[MAX_SWAPFILES]; 2302b281117SSeth Jennings 231f1c54846SDan Streetman /* RCU-protected iteration */ 232f1c54846SDan Streetman static LIST_HEAD(zswap_pools); 233f1c54846SDan Streetman /* protects zswap_pools list modification */ 234f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock); 23532a4e169SDan Streetman /* pool counter to provide unique names to zpool */ 23632a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0); 237f1c54846SDan Streetman 2389021ccecSLiu Shixin enum zswap_init_type { 2399021ccecSLiu Shixin ZSWAP_UNINIT, 2409021ccecSLiu Shixin ZSWAP_INIT_SUCCEED, 2419021ccecSLiu Shixin ZSWAP_INIT_FAILED 2429021ccecSLiu Shixin }; 24390b0fc26SDan Streetman 2449021ccecSLiu Shixin static enum zswap_init_type zswap_init_state; 245d7b028f5SDan Streetman 246141fdeecSLiu Shixin /* used to ensure the integrity of initialization */ 247141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock); 248f1c54846SDan Streetman 249ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */ 250ae3d89a7SDan Streetman static bool zswap_has_pool; 251ae3d89a7SDan Streetman 252f1c54846SDan Streetman /********************************* 253f1c54846SDan Streetman * helpers and fwd declarations 254f1c54846SDan Streetman **********************************/ 255f1c54846SDan Streetman 25644c7c734SChengming Zhou static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp) 25744c7c734SChengming Zhou { 25844c7c734SChengming Zhou return &zswap_trees[swp_type(swp)][swp_offset(swp) 25944c7c734SChengming Zhou >> SWAP_ADDRESS_SPACE_SHIFT]; 26044c7c734SChengming Zhou } 26144c7c734SChengming Zhou 262f1c54846SDan Streetman #define zswap_pool_debug(msg, p) \ 263f1c54846SDan Streetman pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 264b8cf32dcSYosry Ahmed zpool_get_type((p)->zpools[0])) 265f1c54846SDan Streetman 266f1c54846SDan Streetman static bool zswap_is_full(void) 267f1c54846SDan Streetman { 268ca79b0c2SArun KS return totalram_pages() * zswap_max_pool_percent / 100 < 269f1c54846SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 270f1c54846SDan Streetman } 271f1c54846SDan Streetman 27245190f01SVitaly Wool static bool zswap_can_accept(void) 27345190f01SVitaly Wool { 27445190f01SVitaly Wool return totalram_pages() * zswap_accept_thr_percent / 100 * 27545190f01SVitaly Wool zswap_max_pool_percent / 100 > 27645190f01SVitaly Wool DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 27745190f01SVitaly Wool } 27845190f01SVitaly Wool 279b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool) 280b5ba474fSNhat Pham { 281b5ba474fSNhat Pham u64 pool_size = 0; 282b5ba474fSNhat Pham int i; 283b5ba474fSNhat Pham 284b5ba474fSNhat Pham for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 285b5ba474fSNhat Pham pool_size += zpool_get_total_size(pool->zpools[i]); 286b5ba474fSNhat Pham 287b5ba474fSNhat Pham return pool_size; 288b5ba474fSNhat Pham } 289b5ba474fSNhat Pham 290f1c54846SDan Streetman static void zswap_update_total_size(void) 291f1c54846SDan Streetman { 292f1c54846SDan Streetman struct zswap_pool *pool; 293f1c54846SDan Streetman u64 total = 0; 294f1c54846SDan Streetman 295f1c54846SDan Streetman rcu_read_lock(); 296f1c54846SDan Streetman 297f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 298b5ba474fSNhat Pham total += get_zswap_pool_size(pool); 299f1c54846SDan Streetman 300f1c54846SDan Streetman rcu_read_unlock(); 301f1c54846SDan Streetman 302f1c54846SDan Streetman zswap_pool_total_size = total; 303f1c54846SDan Streetman } 304f1c54846SDan Streetman 305a984649bSJohannes Weiner /********************************* 306a984649bSJohannes Weiner * pool functions 307a984649bSJohannes Weiner **********************************/ 308*94ace3feSChengming Zhou static void __zswap_pool_empty(struct percpu_ref *ref); 309a984649bSJohannes Weiner 310a984649bSJohannes Weiner static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 311a984649bSJohannes Weiner { 312a984649bSJohannes Weiner int i; 313a984649bSJohannes Weiner struct zswap_pool *pool; 314a984649bSJohannes Weiner char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 315a984649bSJohannes Weiner gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 316a984649bSJohannes Weiner int ret; 317a984649bSJohannes Weiner 318a984649bSJohannes Weiner if (!zswap_has_pool) { 319a984649bSJohannes Weiner /* if either are unset, pool initialization failed, and we 320a984649bSJohannes Weiner * need both params to be set correctly before trying to 321a984649bSJohannes Weiner * create a pool. 322a984649bSJohannes Weiner */ 323a984649bSJohannes Weiner if (!strcmp(type, ZSWAP_PARAM_UNSET)) 324a984649bSJohannes Weiner return NULL; 325a984649bSJohannes Weiner if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 326a984649bSJohannes Weiner return NULL; 327a984649bSJohannes Weiner } 328a984649bSJohannes Weiner 329a984649bSJohannes Weiner pool = kzalloc(sizeof(*pool), GFP_KERNEL); 330a984649bSJohannes Weiner if (!pool) 331a984649bSJohannes Weiner return NULL; 332a984649bSJohannes Weiner 333a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 334a984649bSJohannes Weiner /* unique name for each pool specifically required by zsmalloc */ 335a984649bSJohannes Weiner snprintf(name, 38, "zswap%x", 336a984649bSJohannes Weiner atomic_inc_return(&zswap_pools_count)); 337a984649bSJohannes Weiner 338a984649bSJohannes Weiner pool->zpools[i] = zpool_create_pool(type, name, gfp); 339a984649bSJohannes Weiner if (!pool->zpools[i]) { 340a984649bSJohannes Weiner pr_err("%s zpool not available\n", type); 341a984649bSJohannes Weiner goto error; 342a984649bSJohannes Weiner } 343a984649bSJohannes Weiner } 344a984649bSJohannes Weiner pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 345a984649bSJohannes Weiner 346a984649bSJohannes Weiner strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 347a984649bSJohannes Weiner 348a984649bSJohannes Weiner pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 349a984649bSJohannes Weiner if (!pool->acomp_ctx) { 350a984649bSJohannes Weiner pr_err("percpu alloc failed\n"); 351a984649bSJohannes Weiner goto error; 352a984649bSJohannes Weiner } 353a984649bSJohannes Weiner 354a984649bSJohannes Weiner ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 355a984649bSJohannes Weiner &pool->node); 356a984649bSJohannes Weiner if (ret) 357a984649bSJohannes Weiner goto error; 358a984649bSJohannes Weiner 359a984649bSJohannes Weiner /* being the current pool takes 1 ref; this func expects the 360a984649bSJohannes Weiner * caller to always add the new pool as the current pool 361a984649bSJohannes Weiner */ 362*94ace3feSChengming Zhou ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, 363*94ace3feSChengming Zhou PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); 364*94ace3feSChengming Zhou if (ret) 365*94ace3feSChengming Zhou goto ref_fail; 366a984649bSJohannes Weiner INIT_LIST_HEAD(&pool->list); 367a984649bSJohannes Weiner 368a984649bSJohannes Weiner zswap_pool_debug("created", pool); 369a984649bSJohannes Weiner 370a984649bSJohannes Weiner return pool; 371a984649bSJohannes Weiner 372*94ace3feSChengming Zhou ref_fail: 373*94ace3feSChengming Zhou cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 374a984649bSJohannes Weiner error: 375a984649bSJohannes Weiner if (pool->acomp_ctx) 376a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 377a984649bSJohannes Weiner while (i--) 378a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 379a984649bSJohannes Weiner kfree(pool); 380a984649bSJohannes Weiner return NULL; 381a984649bSJohannes Weiner } 382a984649bSJohannes Weiner 383a984649bSJohannes Weiner static struct zswap_pool *__zswap_pool_create_fallback(void) 384a984649bSJohannes Weiner { 385a984649bSJohannes Weiner bool has_comp, has_zpool; 386a984649bSJohannes Weiner 387a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 388a984649bSJohannes Weiner if (!has_comp && strcmp(zswap_compressor, 389a984649bSJohannes Weiner CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 390a984649bSJohannes Weiner pr_err("compressor %s not available, using default %s\n", 391a984649bSJohannes Weiner zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 392a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 393a984649bSJohannes Weiner zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 394a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 395a984649bSJohannes Weiner } 396a984649bSJohannes Weiner if (!has_comp) { 397a984649bSJohannes Weiner pr_err("default compressor %s not available\n", 398a984649bSJohannes Weiner zswap_compressor); 399a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 400a984649bSJohannes Weiner zswap_compressor = ZSWAP_PARAM_UNSET; 401a984649bSJohannes Weiner } 402a984649bSJohannes Weiner 403a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 404a984649bSJohannes Weiner if (!has_zpool && strcmp(zswap_zpool_type, 405a984649bSJohannes Weiner CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 406a984649bSJohannes Weiner pr_err("zpool %s not available, using default %s\n", 407a984649bSJohannes Weiner zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 408a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 409a984649bSJohannes Weiner zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 410a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 411a984649bSJohannes Weiner } 412a984649bSJohannes Weiner if (!has_zpool) { 413a984649bSJohannes Weiner pr_err("default zpool %s not available\n", 414a984649bSJohannes Weiner zswap_zpool_type); 415a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 416a984649bSJohannes Weiner zswap_zpool_type = ZSWAP_PARAM_UNSET; 417a984649bSJohannes Weiner } 418a984649bSJohannes Weiner 419a984649bSJohannes Weiner if (!has_comp || !has_zpool) 420a984649bSJohannes Weiner return NULL; 421a984649bSJohannes Weiner 422a984649bSJohannes Weiner return zswap_pool_create(zswap_zpool_type, zswap_compressor); 423a984649bSJohannes Weiner } 424a984649bSJohannes Weiner 425a984649bSJohannes Weiner static void zswap_pool_destroy(struct zswap_pool *pool) 426a984649bSJohannes Weiner { 427a984649bSJohannes Weiner int i; 428a984649bSJohannes Weiner 429a984649bSJohannes Weiner zswap_pool_debug("destroying", pool); 430a984649bSJohannes Weiner 431a984649bSJohannes Weiner cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 432a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 433a984649bSJohannes Weiner 434a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 435a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 436a984649bSJohannes Weiner kfree(pool); 437a984649bSJohannes Weiner } 438a984649bSJohannes Weiner 43939f3ec8eSJohannes Weiner static void __zswap_pool_release(struct work_struct *work) 44039f3ec8eSJohannes Weiner { 44139f3ec8eSJohannes Weiner struct zswap_pool *pool = container_of(work, typeof(*pool), 44239f3ec8eSJohannes Weiner release_work); 44339f3ec8eSJohannes Weiner 44439f3ec8eSJohannes Weiner synchronize_rcu(); 44539f3ec8eSJohannes Weiner 446*94ace3feSChengming Zhou /* nobody should have been able to get a ref... */ 447*94ace3feSChengming Zhou WARN_ON(!percpu_ref_is_zero(&pool->ref)); 448*94ace3feSChengming Zhou percpu_ref_exit(&pool->ref); 44939f3ec8eSJohannes Weiner 45039f3ec8eSJohannes Weiner /* pool is now off zswap_pools list and has no references. */ 45139f3ec8eSJohannes Weiner zswap_pool_destroy(pool); 45239f3ec8eSJohannes Weiner } 45339f3ec8eSJohannes Weiner 45439f3ec8eSJohannes Weiner static struct zswap_pool *zswap_pool_current(void); 45539f3ec8eSJohannes Weiner 456*94ace3feSChengming Zhou static void __zswap_pool_empty(struct percpu_ref *ref) 45739f3ec8eSJohannes Weiner { 45839f3ec8eSJohannes Weiner struct zswap_pool *pool; 45939f3ec8eSJohannes Weiner 460*94ace3feSChengming Zhou pool = container_of(ref, typeof(*pool), ref); 46139f3ec8eSJohannes Weiner 462*94ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 46339f3ec8eSJohannes Weiner 46439f3ec8eSJohannes Weiner WARN_ON(pool == zswap_pool_current()); 46539f3ec8eSJohannes Weiner 46639f3ec8eSJohannes Weiner list_del_rcu(&pool->list); 46739f3ec8eSJohannes Weiner 46839f3ec8eSJohannes Weiner INIT_WORK(&pool->release_work, __zswap_pool_release); 46939f3ec8eSJohannes Weiner schedule_work(&pool->release_work); 47039f3ec8eSJohannes Weiner 471*94ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 47239f3ec8eSJohannes Weiner } 47339f3ec8eSJohannes Weiner 47439f3ec8eSJohannes Weiner static int __must_check zswap_pool_get(struct zswap_pool *pool) 47539f3ec8eSJohannes Weiner { 47639f3ec8eSJohannes Weiner if (!pool) 47739f3ec8eSJohannes Weiner return 0; 47839f3ec8eSJohannes Weiner 479*94ace3feSChengming Zhou return percpu_ref_tryget(&pool->ref); 48039f3ec8eSJohannes Weiner } 48139f3ec8eSJohannes Weiner 48239f3ec8eSJohannes Weiner static void zswap_pool_put(struct zswap_pool *pool) 48339f3ec8eSJohannes Weiner { 484*94ace3feSChengming Zhou percpu_ref_put(&pool->ref); 48539f3ec8eSJohannes Weiner } 48639f3ec8eSJohannes Weiner 487c1a0ecb8SJohannes Weiner static struct zswap_pool *__zswap_pool_current(void) 488c1a0ecb8SJohannes Weiner { 489c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 490c1a0ecb8SJohannes Weiner 491c1a0ecb8SJohannes Weiner pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 492c1a0ecb8SJohannes Weiner WARN_ONCE(!pool && zswap_has_pool, 493c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 494c1a0ecb8SJohannes Weiner 495c1a0ecb8SJohannes Weiner return pool; 496c1a0ecb8SJohannes Weiner } 497c1a0ecb8SJohannes Weiner 498c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current(void) 499c1a0ecb8SJohannes Weiner { 500c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 501c1a0ecb8SJohannes Weiner 502c1a0ecb8SJohannes Weiner return __zswap_pool_current(); 503c1a0ecb8SJohannes Weiner } 504c1a0ecb8SJohannes Weiner 505c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current_get(void) 506c1a0ecb8SJohannes Weiner { 507c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 508c1a0ecb8SJohannes Weiner 509c1a0ecb8SJohannes Weiner rcu_read_lock(); 510c1a0ecb8SJohannes Weiner 511c1a0ecb8SJohannes Weiner pool = __zswap_pool_current(); 512c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 513c1a0ecb8SJohannes Weiner pool = NULL; 514c1a0ecb8SJohannes Weiner 515c1a0ecb8SJohannes Weiner rcu_read_unlock(); 516c1a0ecb8SJohannes Weiner 517c1a0ecb8SJohannes Weiner return pool; 518c1a0ecb8SJohannes Weiner } 519c1a0ecb8SJohannes Weiner 520c1a0ecb8SJohannes Weiner /* type and compressor must be null-terminated */ 521c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 522c1a0ecb8SJohannes Weiner { 523c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 524c1a0ecb8SJohannes Weiner 525c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 526c1a0ecb8SJohannes Weiner 527c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) { 528c1a0ecb8SJohannes Weiner if (strcmp(pool->tfm_name, compressor)) 529c1a0ecb8SJohannes Weiner continue; 530c1a0ecb8SJohannes Weiner /* all zpools share the same type */ 531c1a0ecb8SJohannes Weiner if (strcmp(zpool_get_type(pool->zpools[0]), type)) 532c1a0ecb8SJohannes Weiner continue; 533c1a0ecb8SJohannes Weiner /* if we can't get it, it's about to be destroyed */ 534c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 535c1a0ecb8SJohannes Weiner continue; 536c1a0ecb8SJohannes Weiner return pool; 537c1a0ecb8SJohannes Weiner } 538c1a0ecb8SJohannes Weiner 539c1a0ecb8SJohannes Weiner return NULL; 540c1a0ecb8SJohannes Weiner } 541c1a0ecb8SJohannes Weiner 542abca07c0SJohannes Weiner /********************************* 543abca07c0SJohannes Weiner * param callbacks 544abca07c0SJohannes Weiner **********************************/ 545abca07c0SJohannes Weiner 546abca07c0SJohannes Weiner static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 547abca07c0SJohannes Weiner { 548abca07c0SJohannes Weiner /* no change required */ 549abca07c0SJohannes Weiner if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 550abca07c0SJohannes Weiner return false; 551abca07c0SJohannes Weiner return true; 552abca07c0SJohannes Weiner } 553abca07c0SJohannes Weiner 554abca07c0SJohannes Weiner /* val must be a null-terminated string */ 555abca07c0SJohannes Weiner static int __zswap_param_set(const char *val, const struct kernel_param *kp, 556abca07c0SJohannes Weiner char *type, char *compressor) 557abca07c0SJohannes Weiner { 558abca07c0SJohannes Weiner struct zswap_pool *pool, *put_pool = NULL; 559abca07c0SJohannes Weiner char *s = strstrip((char *)val); 560abca07c0SJohannes Weiner int ret = 0; 561abca07c0SJohannes Weiner bool new_pool = false; 562abca07c0SJohannes Weiner 563abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 564abca07c0SJohannes Weiner switch (zswap_init_state) { 565abca07c0SJohannes Weiner case ZSWAP_UNINIT: 566abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, 567abca07c0SJohannes Weiner * don't create a pool; that's done during init. 568abca07c0SJohannes Weiner */ 569abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 570abca07c0SJohannes Weiner break; 571abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 572abca07c0SJohannes Weiner new_pool = zswap_pool_changed(s, kp); 573abca07c0SJohannes Weiner break; 574abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 575abca07c0SJohannes Weiner pr_err("can't set param, initialization failed\n"); 576abca07c0SJohannes Weiner ret = -ENODEV; 577abca07c0SJohannes Weiner } 578abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 579abca07c0SJohannes Weiner 580abca07c0SJohannes Weiner /* no need to create a new pool, return directly */ 581abca07c0SJohannes Weiner if (!new_pool) 582abca07c0SJohannes Weiner return ret; 583abca07c0SJohannes Weiner 584abca07c0SJohannes Weiner if (!type) { 585abca07c0SJohannes Weiner if (!zpool_has_pool(s)) { 586abca07c0SJohannes Weiner pr_err("zpool %s not available\n", s); 587abca07c0SJohannes Weiner return -ENOENT; 588abca07c0SJohannes Weiner } 589abca07c0SJohannes Weiner type = s; 590abca07c0SJohannes Weiner } else if (!compressor) { 591abca07c0SJohannes Weiner if (!crypto_has_acomp(s, 0, 0)) { 592abca07c0SJohannes Weiner pr_err("compressor %s not available\n", s); 593abca07c0SJohannes Weiner return -ENOENT; 594abca07c0SJohannes Weiner } 595abca07c0SJohannes Weiner compressor = s; 596abca07c0SJohannes Weiner } else { 597abca07c0SJohannes Weiner WARN_ON(1); 598abca07c0SJohannes Weiner return -EINVAL; 599abca07c0SJohannes Weiner } 600abca07c0SJohannes Weiner 601*94ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 602abca07c0SJohannes Weiner 603abca07c0SJohannes Weiner pool = zswap_pool_find_get(type, compressor); 604abca07c0SJohannes Weiner if (pool) { 605abca07c0SJohannes Weiner zswap_pool_debug("using existing", pool); 606abca07c0SJohannes Weiner WARN_ON(pool == zswap_pool_current()); 607abca07c0SJohannes Weiner list_del_rcu(&pool->list); 608abca07c0SJohannes Weiner } 609abca07c0SJohannes Weiner 610*94ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 611abca07c0SJohannes Weiner 612abca07c0SJohannes Weiner if (!pool) 613abca07c0SJohannes Weiner pool = zswap_pool_create(type, compressor); 614*94ace3feSChengming Zhou else { 615*94ace3feSChengming Zhou /* 616*94ace3feSChengming Zhou * Restore the initial ref dropped by percpu_ref_kill() 617*94ace3feSChengming Zhou * when the pool was decommissioned and switch it again 618*94ace3feSChengming Zhou * to percpu mode. 619*94ace3feSChengming Zhou */ 620*94ace3feSChengming Zhou percpu_ref_resurrect(&pool->ref); 621*94ace3feSChengming Zhou 622*94ace3feSChengming Zhou /* Drop the ref from zswap_pool_find_get(). */ 623*94ace3feSChengming Zhou zswap_pool_put(pool); 624*94ace3feSChengming Zhou } 625abca07c0SJohannes Weiner 626abca07c0SJohannes Weiner if (pool) 627abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 628abca07c0SJohannes Weiner else 629abca07c0SJohannes Weiner ret = -EINVAL; 630abca07c0SJohannes Weiner 631*94ace3feSChengming Zhou spin_lock_bh(&zswap_pools_lock); 632abca07c0SJohannes Weiner 633abca07c0SJohannes Weiner if (!ret) { 634abca07c0SJohannes Weiner put_pool = zswap_pool_current(); 635abca07c0SJohannes Weiner list_add_rcu(&pool->list, &zswap_pools); 636abca07c0SJohannes Weiner zswap_has_pool = true; 637abca07c0SJohannes Weiner } else if (pool) { 638abca07c0SJohannes Weiner /* add the possibly pre-existing pool to the end of the pools 639abca07c0SJohannes Weiner * list; if it's new (and empty) then it'll be removed and 640abca07c0SJohannes Weiner * destroyed by the put after we drop the lock 641abca07c0SJohannes Weiner */ 642abca07c0SJohannes Weiner list_add_tail_rcu(&pool->list, &zswap_pools); 643abca07c0SJohannes Weiner put_pool = pool; 644abca07c0SJohannes Weiner } 645abca07c0SJohannes Weiner 646*94ace3feSChengming Zhou spin_unlock_bh(&zswap_pools_lock); 647abca07c0SJohannes Weiner 648abca07c0SJohannes Weiner if (!zswap_has_pool && !pool) { 649abca07c0SJohannes Weiner /* if initial pool creation failed, and this pool creation also 650abca07c0SJohannes Weiner * failed, maybe both compressor and zpool params were bad. 651abca07c0SJohannes Weiner * Allow changing this param, so pool creation will succeed 652abca07c0SJohannes Weiner * when the other param is changed. We already verified this 653abca07c0SJohannes Weiner * param is ok in the zpool_has_pool() or crypto_has_acomp() 654abca07c0SJohannes Weiner * checks above. 655abca07c0SJohannes Weiner */ 656abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 657abca07c0SJohannes Weiner } 658abca07c0SJohannes Weiner 659abca07c0SJohannes Weiner /* drop the ref from either the old current pool, 660abca07c0SJohannes Weiner * or the new pool we failed to add 661abca07c0SJohannes Weiner */ 662abca07c0SJohannes Weiner if (put_pool) 663*94ace3feSChengming Zhou percpu_ref_kill(&put_pool->ref); 664abca07c0SJohannes Weiner 665abca07c0SJohannes Weiner return ret; 666abca07c0SJohannes Weiner } 667abca07c0SJohannes Weiner 668abca07c0SJohannes Weiner static int zswap_compressor_param_set(const char *val, 669abca07c0SJohannes Weiner const struct kernel_param *kp) 670abca07c0SJohannes Weiner { 671abca07c0SJohannes Weiner return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 672abca07c0SJohannes Weiner } 673abca07c0SJohannes Weiner 674abca07c0SJohannes Weiner static int zswap_zpool_param_set(const char *val, 675abca07c0SJohannes Weiner const struct kernel_param *kp) 676abca07c0SJohannes Weiner { 677abca07c0SJohannes Weiner return __zswap_param_set(val, kp, NULL, zswap_compressor); 678abca07c0SJohannes Weiner } 679abca07c0SJohannes Weiner 680abca07c0SJohannes Weiner static int zswap_enabled_param_set(const char *val, 681abca07c0SJohannes Weiner const struct kernel_param *kp) 682abca07c0SJohannes Weiner { 683abca07c0SJohannes Weiner int ret = -ENODEV; 684abca07c0SJohannes Weiner 685abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, only set param. */ 686abca07c0SJohannes Weiner if (system_state != SYSTEM_RUNNING) 687abca07c0SJohannes Weiner return param_set_bool(val, kp); 688abca07c0SJohannes Weiner 689abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 690abca07c0SJohannes Weiner switch (zswap_init_state) { 691abca07c0SJohannes Weiner case ZSWAP_UNINIT: 692abca07c0SJohannes Weiner if (zswap_setup()) 693abca07c0SJohannes Weiner break; 694abca07c0SJohannes Weiner fallthrough; 695abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 696abca07c0SJohannes Weiner if (!zswap_has_pool) 697abca07c0SJohannes Weiner pr_err("can't enable, no pool configured\n"); 698abca07c0SJohannes Weiner else 699abca07c0SJohannes Weiner ret = param_set_bool(val, kp); 700abca07c0SJohannes Weiner break; 701abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 702abca07c0SJohannes Weiner pr_err("can't enable, initialization failed\n"); 703abca07c0SJohannes Weiner } 704abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 705abca07c0SJohannes Weiner 706abca07c0SJohannes Weiner return ret; 707abca07c0SJohannes Weiner } 708abca07c0SJohannes Weiner 709506a86c5SJohannes Weiner /********************************* 710506a86c5SJohannes Weiner * lru functions 711506a86c5SJohannes Weiner **********************************/ 712506a86c5SJohannes Weiner 713a65b0e76SDomenico Cerasuolo /* should be called under RCU */ 714a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG 715a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 716a65b0e76SDomenico Cerasuolo { 717a65b0e76SDomenico Cerasuolo return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; 718a65b0e76SDomenico Cerasuolo } 719a65b0e76SDomenico Cerasuolo #else 720a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 721a65b0e76SDomenico Cerasuolo { 722a65b0e76SDomenico Cerasuolo return NULL; 723a65b0e76SDomenico Cerasuolo } 724a65b0e76SDomenico Cerasuolo #endif 725a65b0e76SDomenico Cerasuolo 726a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry) 727a65b0e76SDomenico Cerasuolo { 728a65b0e76SDomenico Cerasuolo return page_to_nid(virt_to_page(entry)); 729a65b0e76SDomenico Cerasuolo } 730a65b0e76SDomenico Cerasuolo 731a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) 732a65b0e76SDomenico Cerasuolo { 733b5ba474fSNhat Pham atomic_long_t *nr_zswap_protected; 734b5ba474fSNhat Pham unsigned long lru_size, old, new; 735a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 736a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 737b5ba474fSNhat Pham struct lruvec *lruvec; 738a65b0e76SDomenico Cerasuolo 739a65b0e76SDomenico Cerasuolo /* 740a65b0e76SDomenico Cerasuolo * Note that it is safe to use rcu_read_lock() here, even in the face of 741a65b0e76SDomenico Cerasuolo * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection 742a65b0e76SDomenico Cerasuolo * used in list_lru lookup, only two scenarios are possible: 743a65b0e76SDomenico Cerasuolo * 744a65b0e76SDomenico Cerasuolo * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The 745a65b0e76SDomenico Cerasuolo * new entry will be reparented to memcg's parent's list_lru. 746a65b0e76SDomenico Cerasuolo * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The 747a65b0e76SDomenico Cerasuolo * new entry will be added directly to memcg's parent's list_lru. 748a65b0e76SDomenico Cerasuolo * 7493f798aa6SChengming Zhou * Similar reasoning holds for list_lru_del(). 750a65b0e76SDomenico Cerasuolo */ 751a65b0e76SDomenico Cerasuolo rcu_read_lock(); 752a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 753a65b0e76SDomenico Cerasuolo /* will always succeed */ 754a65b0e76SDomenico Cerasuolo list_lru_add(list_lru, &entry->lru, nid, memcg); 755b5ba474fSNhat Pham 756b5ba474fSNhat Pham /* Update the protection area */ 757b5ba474fSNhat Pham lru_size = list_lru_count_one(list_lru, nid, memcg); 758b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); 759b5ba474fSNhat Pham nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected; 760b5ba474fSNhat Pham old = atomic_long_inc_return(nr_zswap_protected); 761b5ba474fSNhat Pham /* 762b5ba474fSNhat Pham * Decay to avoid overflow and adapt to changing workloads. 763b5ba474fSNhat Pham * This is based on LRU reclaim cost decaying heuristics. 764b5ba474fSNhat Pham */ 765b5ba474fSNhat Pham do { 766b5ba474fSNhat Pham new = old > lru_size / 4 ? old / 2 : old; 767b5ba474fSNhat Pham } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new)); 768a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 769a65b0e76SDomenico Cerasuolo } 770a65b0e76SDomenico Cerasuolo 771a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) 772a65b0e76SDomenico Cerasuolo { 773a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 774a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 775a65b0e76SDomenico Cerasuolo 776a65b0e76SDomenico Cerasuolo rcu_read_lock(); 777a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 778a65b0e76SDomenico Cerasuolo /* will always succeed */ 779a65b0e76SDomenico Cerasuolo list_lru_del(list_lru, &entry->lru, nid, memcg); 780a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 781a65b0e76SDomenico Cerasuolo } 782a65b0e76SDomenico Cerasuolo 7835182661aSJohannes Weiner void zswap_lruvec_state_init(struct lruvec *lruvec) 7845182661aSJohannes Weiner { 7855182661aSJohannes Weiner atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0); 7865182661aSJohannes Weiner } 7875182661aSJohannes Weiner 7885182661aSJohannes Weiner void zswap_folio_swapin(struct folio *folio) 7895182661aSJohannes Weiner { 7905182661aSJohannes Weiner struct lruvec *lruvec; 7915182661aSJohannes Weiner 7925182661aSJohannes Weiner if (folio) { 7935182661aSJohannes Weiner lruvec = folio_lruvec(folio); 7945182661aSJohannes Weiner atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 7955182661aSJohannes Weiner } 7965182661aSJohannes Weiner } 7975182661aSJohannes Weiner 7985182661aSJohannes Weiner void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) 7995182661aSJohannes Weiner { 800bf9b7df2SChengming Zhou /* lock out zswap shrinker walking memcg tree */ 801bf9b7df2SChengming Zhou spin_lock(&zswap.shrink_lock); 802bf9b7df2SChengming Zhou if (zswap.next_shrink == memcg) 803bf9b7df2SChengming Zhou zswap.next_shrink = mem_cgroup_iter(NULL, zswap.next_shrink, NULL); 804bf9b7df2SChengming Zhou spin_unlock(&zswap.shrink_lock); 8055182661aSJohannes Weiner } 8065182661aSJohannes Weiner 8075182661aSJohannes Weiner /********************************* 8082b281117SSeth Jennings * rbtree functions 8092b281117SSeth Jennings **********************************/ 8102b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 8112b281117SSeth Jennings { 8122b281117SSeth Jennings struct rb_node *node = root->rb_node; 8132b281117SSeth Jennings struct zswap_entry *entry; 8140bb48849SDomenico Cerasuolo pgoff_t entry_offset; 8152b281117SSeth Jennings 8162b281117SSeth Jennings while (node) { 8172b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 8180bb48849SDomenico Cerasuolo entry_offset = swp_offset(entry->swpentry); 8190bb48849SDomenico Cerasuolo if (entry_offset > offset) 8202b281117SSeth Jennings node = node->rb_left; 8210bb48849SDomenico Cerasuolo else if (entry_offset < offset) 8222b281117SSeth Jennings node = node->rb_right; 8232b281117SSeth Jennings else 8242b281117SSeth Jennings return entry; 8252b281117SSeth Jennings } 8262b281117SSeth Jennings return NULL; 8272b281117SSeth Jennings } 8282b281117SSeth Jennings 8292b281117SSeth Jennings /* 8302b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 8312b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 8322b281117SSeth Jennings */ 8332b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 8342b281117SSeth Jennings struct zswap_entry **dupentry) 8352b281117SSeth Jennings { 8362b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 8372b281117SSeth Jennings struct zswap_entry *myentry; 8380bb48849SDomenico Cerasuolo pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 8392b281117SSeth Jennings 8402b281117SSeth Jennings while (*link) { 8412b281117SSeth Jennings parent = *link; 8422b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 8430bb48849SDomenico Cerasuolo myentry_offset = swp_offset(myentry->swpentry); 8440bb48849SDomenico Cerasuolo if (myentry_offset > entry_offset) 8452b281117SSeth Jennings link = &(*link)->rb_left; 8460bb48849SDomenico Cerasuolo else if (myentry_offset < entry_offset) 8472b281117SSeth Jennings link = &(*link)->rb_right; 8482b281117SSeth Jennings else { 8492b281117SSeth Jennings *dupentry = myentry; 8502b281117SSeth Jennings return -EEXIST; 8512b281117SSeth Jennings } 8522b281117SSeth Jennings } 8532b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 8542b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 8552b281117SSeth Jennings return 0; 8562b281117SSeth Jennings } 8572b281117SSeth Jennings 858a230c20eSChengming Zhou static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 8590ab0abcfSWeijie Yang { 8600ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 8610ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 8620ab0abcfSWeijie Yang } 8630ab0abcfSWeijie Yang 86436034bf6SJohannes Weiner /********************************* 86536034bf6SJohannes Weiner * zswap entry functions 86636034bf6SJohannes Weiner **********************************/ 86736034bf6SJohannes Weiner static struct kmem_cache *zswap_entry_cache; 86836034bf6SJohannes Weiner 86936034bf6SJohannes Weiner static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) 87036034bf6SJohannes Weiner { 87136034bf6SJohannes Weiner struct zswap_entry *entry; 87236034bf6SJohannes Weiner entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); 87336034bf6SJohannes Weiner if (!entry) 87436034bf6SJohannes Weiner return NULL; 87536034bf6SJohannes Weiner RB_CLEAR_NODE(&entry->rbnode); 87636034bf6SJohannes Weiner return entry; 87736034bf6SJohannes Weiner } 87836034bf6SJohannes Weiner 87936034bf6SJohannes Weiner static void zswap_entry_cache_free(struct zswap_entry *entry) 88036034bf6SJohannes Weiner { 88136034bf6SJohannes Weiner kmem_cache_free(zswap_entry_cache, entry); 88236034bf6SJohannes Weiner } 88336034bf6SJohannes Weiner 884b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 885b8cf32dcSYosry Ahmed { 886b8cf32dcSYosry Ahmed int i = 0; 887b8cf32dcSYosry Ahmed 888b8cf32dcSYosry Ahmed if (ZSWAP_NR_ZPOOLS > 1) 889b8cf32dcSYosry Ahmed i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 890b8cf32dcSYosry Ahmed 891b8cf32dcSYosry Ahmed return entry->pool->zpools[i]; 892b8cf32dcSYosry Ahmed } 893b8cf32dcSYosry Ahmed 8940ab0abcfSWeijie Yang /* 89512d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 8960ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 8970ab0abcfSWeijie Yang */ 89842398be2SJohannes Weiner static void zswap_entry_free(struct zswap_entry *entry) 8990ab0abcfSWeijie Yang { 900a85f878bSSrividya Desireddy if (!entry->length) 901a85f878bSSrividya Desireddy atomic_dec(&zswap_same_filled_pages); 902a85f878bSSrividya Desireddy else { 903bf9b7df2SChengming Zhou zswap_lru_del(&zswap.list_lru, entry); 904b8cf32dcSYosry Ahmed zpool_free(zswap_find_zpool(entry), entry->handle); 905bf9b7df2SChengming Zhou atomic_dec(&zswap.nr_stored); 906f1c54846SDan Streetman zswap_pool_put(entry->pool); 907a85f878bSSrividya Desireddy } 9082e601e1eSJohannes Weiner if (entry->objcg) { 9092e601e1eSJohannes Weiner obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 9102e601e1eSJohannes Weiner obj_cgroup_put(entry->objcg); 9112e601e1eSJohannes Weiner } 9120ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 9130ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 914f1c54846SDan Streetman zswap_update_total_size(); 9150ab0abcfSWeijie Yang } 9160ab0abcfSWeijie Yang 9177dd1f7f0SJohannes Weiner /* 918a230c20eSChengming Zhou * The caller hold the tree lock and search the entry from the tree, 919a230c20eSChengming Zhou * so it must be on the tree, remove it from the tree and free it. 9207dd1f7f0SJohannes Weiner */ 9217dd1f7f0SJohannes Weiner static void zswap_invalidate_entry(struct zswap_tree *tree, 9227dd1f7f0SJohannes Weiner struct zswap_entry *entry) 9237dd1f7f0SJohannes Weiner { 924a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 925a230c20eSChengming Zhou zswap_entry_free(entry); 9267dd1f7f0SJohannes Weiner } 9277dd1f7f0SJohannes Weiner 9282b281117SSeth Jennings /********************************* 929f91e81d3SJohannes Weiner * compressed storage functions 930f91e81d3SJohannes Weiner **********************************/ 93164f200b8SJohannes Weiner static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 93264f200b8SJohannes Weiner { 93364f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 93464f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 93564f200b8SJohannes Weiner struct crypto_acomp *acomp; 93664f200b8SJohannes Weiner struct acomp_req *req; 93764f200b8SJohannes Weiner int ret; 93864f200b8SJohannes Weiner 93964f200b8SJohannes Weiner mutex_init(&acomp_ctx->mutex); 94064f200b8SJohannes Weiner 94164f200b8SJohannes Weiner acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 94264f200b8SJohannes Weiner if (!acomp_ctx->buffer) 94364f200b8SJohannes Weiner return -ENOMEM; 94464f200b8SJohannes Weiner 94564f200b8SJohannes Weiner acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 94664f200b8SJohannes Weiner if (IS_ERR(acomp)) { 94764f200b8SJohannes Weiner pr_err("could not alloc crypto acomp %s : %ld\n", 94864f200b8SJohannes Weiner pool->tfm_name, PTR_ERR(acomp)); 94964f200b8SJohannes Weiner ret = PTR_ERR(acomp); 95064f200b8SJohannes Weiner goto acomp_fail; 95164f200b8SJohannes Weiner } 95264f200b8SJohannes Weiner acomp_ctx->acomp = acomp; 95364f200b8SJohannes Weiner 95464f200b8SJohannes Weiner req = acomp_request_alloc(acomp_ctx->acomp); 95564f200b8SJohannes Weiner if (!req) { 95664f200b8SJohannes Weiner pr_err("could not alloc crypto acomp_request %s\n", 95764f200b8SJohannes Weiner pool->tfm_name); 95864f200b8SJohannes Weiner ret = -ENOMEM; 95964f200b8SJohannes Weiner goto req_fail; 96064f200b8SJohannes Weiner } 96164f200b8SJohannes Weiner acomp_ctx->req = req; 96264f200b8SJohannes Weiner 96364f200b8SJohannes Weiner crypto_init_wait(&acomp_ctx->wait); 96464f200b8SJohannes Weiner /* 96564f200b8SJohannes Weiner * if the backend of acomp is async zip, crypto_req_done() will wakeup 96664f200b8SJohannes Weiner * crypto_wait_req(); if the backend of acomp is scomp, the callback 96764f200b8SJohannes Weiner * won't be called, crypto_wait_req() will return without blocking. 96864f200b8SJohannes Weiner */ 96964f200b8SJohannes Weiner acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 97064f200b8SJohannes Weiner crypto_req_done, &acomp_ctx->wait); 97164f200b8SJohannes Weiner 97264f200b8SJohannes Weiner return 0; 97364f200b8SJohannes Weiner 97464f200b8SJohannes Weiner req_fail: 97564f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 97664f200b8SJohannes Weiner acomp_fail: 97764f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 97864f200b8SJohannes Weiner return ret; 97964f200b8SJohannes Weiner } 98064f200b8SJohannes Weiner 98164f200b8SJohannes Weiner static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 98264f200b8SJohannes Weiner { 98364f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 98464f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 98564f200b8SJohannes Weiner 98664f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx)) { 98764f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->req)) 98864f200b8SJohannes Weiner acomp_request_free(acomp_ctx->req); 98964f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 99064f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 99164f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 99264f200b8SJohannes Weiner } 99364f200b8SJohannes Weiner 99464f200b8SJohannes Weiner return 0; 99564f200b8SJohannes Weiner } 99664f200b8SJohannes Weiner 997f91e81d3SJohannes Weiner static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) 998f91e81d3SJohannes Weiner { 999f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1000f91e81d3SJohannes Weiner struct scatterlist input, output; 100155e78c93SBarry Song int comp_ret = 0, alloc_ret = 0; 1002f91e81d3SJohannes Weiner unsigned int dlen = PAGE_SIZE; 1003f91e81d3SJohannes Weiner unsigned long handle; 1004f91e81d3SJohannes Weiner struct zpool *zpool; 1005f91e81d3SJohannes Weiner char *buf; 1006f91e81d3SJohannes Weiner gfp_t gfp; 1007f91e81d3SJohannes Weiner u8 *dst; 1008f91e81d3SJohannes Weiner 1009f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1010f91e81d3SJohannes Weiner 1011f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1012f91e81d3SJohannes Weiner 1013f91e81d3SJohannes Weiner dst = acomp_ctx->buffer; 1014f91e81d3SJohannes Weiner sg_init_table(&input, 1); 1015f91e81d3SJohannes Weiner sg_set_page(&input, &folio->page, PAGE_SIZE, 0); 1016f91e81d3SJohannes Weiner 1017f91e81d3SJohannes Weiner /* 1018f91e81d3SJohannes Weiner * We need PAGE_SIZE * 2 here since there maybe over-compression case, 1019f91e81d3SJohannes Weiner * and hardware-accelerators may won't check the dst buffer size, so 1020f91e81d3SJohannes Weiner * giving the dst buffer with enough length to avoid buffer overflow. 1021f91e81d3SJohannes Weiner */ 1022f91e81d3SJohannes Weiner sg_init_one(&output, dst, PAGE_SIZE * 2); 1023f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1024f91e81d3SJohannes Weiner 1025f91e81d3SJohannes Weiner /* 1026f91e81d3SJohannes Weiner * it maybe looks a little bit silly that we send an asynchronous request, 1027f91e81d3SJohannes Weiner * then wait for its completion synchronously. This makes the process look 1028f91e81d3SJohannes Weiner * synchronous in fact. 1029f91e81d3SJohannes Weiner * Theoretically, acomp supports users send multiple acomp requests in one 1030f91e81d3SJohannes Weiner * acomp instance, then get those requests done simultaneously. but in this 1031f91e81d3SJohannes Weiner * case, zswap actually does store and load page by page, there is no 1032f91e81d3SJohannes Weiner * existing method to send the second page before the first page is done 1033f91e81d3SJohannes Weiner * in one thread doing zwap. 1034f91e81d3SJohannes Weiner * but in different threads running on different cpu, we have different 1035f91e81d3SJohannes Weiner * acomp instance, so multiple threads can do (de)compression in parallel. 1036f91e81d3SJohannes Weiner */ 103755e78c93SBarry Song comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1038f91e81d3SJohannes Weiner dlen = acomp_ctx->req->dlen; 103955e78c93SBarry Song if (comp_ret) 1040f91e81d3SJohannes Weiner goto unlock; 1041f91e81d3SJohannes Weiner 1042f91e81d3SJohannes Weiner zpool = zswap_find_zpool(entry); 1043f91e81d3SJohannes Weiner gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1044f91e81d3SJohannes Weiner if (zpool_malloc_support_movable(zpool)) 1045f91e81d3SJohannes Weiner gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 104655e78c93SBarry Song alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle); 104755e78c93SBarry Song if (alloc_ret) 1048f91e81d3SJohannes Weiner goto unlock; 1049f91e81d3SJohannes Weiner 1050f91e81d3SJohannes Weiner buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 1051f91e81d3SJohannes Weiner memcpy(buf, dst, dlen); 1052f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, handle); 1053f91e81d3SJohannes Weiner 1054f91e81d3SJohannes Weiner entry->handle = handle; 1055f91e81d3SJohannes Weiner entry->length = dlen; 1056f91e81d3SJohannes Weiner 1057f91e81d3SJohannes Weiner unlock: 105855e78c93SBarry Song if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) 105955e78c93SBarry Song zswap_reject_compress_poor++; 106055e78c93SBarry Song else if (comp_ret) 106155e78c93SBarry Song zswap_reject_compress_fail++; 106255e78c93SBarry Song else if (alloc_ret) 106355e78c93SBarry Song zswap_reject_alloc_fail++; 106455e78c93SBarry Song 1065f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 106655e78c93SBarry Song return comp_ret == 0 && alloc_ret == 0; 1067f91e81d3SJohannes Weiner } 1068f91e81d3SJohannes Weiner 1069f91e81d3SJohannes Weiner static void zswap_decompress(struct zswap_entry *entry, struct page *page) 1070f91e81d3SJohannes Weiner { 1071f91e81d3SJohannes Weiner struct zpool *zpool = zswap_find_zpool(entry); 1072f91e81d3SJohannes Weiner struct scatterlist input, output; 1073f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1074f91e81d3SJohannes Weiner u8 *src; 1075f91e81d3SJohannes Weiner 1076f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1077f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1078f91e81d3SJohannes Weiner 1079f91e81d3SJohannes Weiner src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 1080f91e81d3SJohannes Weiner if (!zpool_can_sleep_mapped(zpool)) { 1081f91e81d3SJohannes Weiner memcpy(acomp_ctx->buffer, src, entry->length); 1082f91e81d3SJohannes Weiner src = acomp_ctx->buffer; 1083f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1084f91e81d3SJohannes Weiner } 1085f91e81d3SJohannes Weiner 1086f91e81d3SJohannes Weiner sg_init_one(&input, src, entry->length); 1087f91e81d3SJohannes Weiner sg_init_table(&output, 1); 1088f91e81d3SJohannes Weiner sg_set_page(&output, page, PAGE_SIZE, 0); 1089f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); 1090f91e81d3SJohannes Weiner BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); 1091f91e81d3SJohannes Weiner BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); 1092f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 1093f91e81d3SJohannes Weiner 1094f91e81d3SJohannes Weiner if (zpool_can_sleep_mapped(zpool)) 1095f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1096f91e81d3SJohannes Weiner } 1097f91e81d3SJohannes Weiner 1098f91e81d3SJohannes Weiner /********************************* 10999986d35dSJohannes Weiner * writeback code 11009986d35dSJohannes Weiner **********************************/ 11019986d35dSJohannes Weiner /* 11029986d35dSJohannes Weiner * Attempts to free an entry by adding a folio to the swap cache, 11039986d35dSJohannes Weiner * decompressing the entry data into the folio, and issuing a 11049986d35dSJohannes Weiner * bio write to write the folio back to the swap device. 11059986d35dSJohannes Weiner * 11069986d35dSJohannes Weiner * This can be thought of as a "resumed writeback" of the folio 11079986d35dSJohannes Weiner * to the swap device. We are basically resuming the same swap 11089986d35dSJohannes Weiner * writeback path that was intercepted with the zswap_store() 11099986d35dSJohannes Weiner * in the first place. After the folio has been decompressed into 11109986d35dSJohannes Weiner * the swap cache, the compressed version stored by zswap can be 11119986d35dSJohannes Weiner * freed. 11129986d35dSJohannes Weiner */ 11139986d35dSJohannes Weiner static int zswap_writeback_entry(struct zswap_entry *entry, 11149986d35dSJohannes Weiner swp_entry_t swpentry) 11159986d35dSJohannes Weiner { 11169986d35dSJohannes Weiner struct zswap_tree *tree; 11179986d35dSJohannes Weiner struct folio *folio; 11189986d35dSJohannes Weiner struct mempolicy *mpol; 11199986d35dSJohannes Weiner bool folio_was_allocated; 11209986d35dSJohannes Weiner struct writeback_control wbc = { 11219986d35dSJohannes Weiner .sync_mode = WB_SYNC_NONE, 11229986d35dSJohannes Weiner }; 11239986d35dSJohannes Weiner 11249986d35dSJohannes Weiner /* try to allocate swap cache folio */ 11259986d35dSJohannes Weiner mpol = get_task_policy(current); 11269986d35dSJohannes Weiner folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 11279986d35dSJohannes Weiner NO_INTERLEAVE_INDEX, &folio_was_allocated, true); 11289986d35dSJohannes Weiner if (!folio) 11299986d35dSJohannes Weiner return -ENOMEM; 11309986d35dSJohannes Weiner 11319986d35dSJohannes Weiner /* 11329986d35dSJohannes Weiner * Found an existing folio, we raced with swapin or concurrent 11339986d35dSJohannes Weiner * shrinker. We generally writeback cold folios from zswap, and 11349986d35dSJohannes Weiner * swapin means the folio just became hot, so skip this folio. 11359986d35dSJohannes Weiner * For unlikely concurrent shrinker case, it will be unlinked 11369986d35dSJohannes Weiner * and freed when invalidated by the concurrent shrinker anyway. 11379986d35dSJohannes Weiner */ 11389986d35dSJohannes Weiner if (!folio_was_allocated) { 11399986d35dSJohannes Weiner folio_put(folio); 11409986d35dSJohannes Weiner return -EEXIST; 11419986d35dSJohannes Weiner } 11429986d35dSJohannes Weiner 11439986d35dSJohannes Weiner /* 11449986d35dSJohannes Weiner * folio is locked, and the swapcache is now secured against 1145f9c0f1c3SChengming Zhou * concurrent swapping to and from the slot, and concurrent 1146f9c0f1c3SChengming Zhou * swapoff so we can safely dereference the zswap tree here. 1147f9c0f1c3SChengming Zhou * Verify that the swap entry hasn't been invalidated and recycled 1148f9c0f1c3SChengming Zhou * behind our backs, to avoid overwriting a new swap folio with 1149f9c0f1c3SChengming Zhou * old compressed data. Only when this is successful can the entry 1150f9c0f1c3SChengming Zhou * be dereferenced. 11519986d35dSJohannes Weiner */ 11529986d35dSJohannes Weiner tree = swap_zswap_tree(swpentry); 11539986d35dSJohannes Weiner spin_lock(&tree->lock); 11549986d35dSJohannes Weiner if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) { 11559986d35dSJohannes Weiner spin_unlock(&tree->lock); 11569986d35dSJohannes Weiner delete_from_swap_cache(folio); 11579986d35dSJohannes Weiner folio_unlock(folio); 11589986d35dSJohannes Weiner folio_put(folio); 11599986d35dSJohannes Weiner return -ENOMEM; 11609986d35dSJohannes Weiner } 11619986d35dSJohannes Weiner 11629986d35dSJohannes Weiner /* Safe to deref entry after the entry is verified above. */ 1163a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 11649986d35dSJohannes Weiner spin_unlock(&tree->lock); 11659986d35dSJohannes Weiner 11669986d35dSJohannes Weiner zswap_decompress(entry, &folio->page); 11679986d35dSJohannes Weiner 11689986d35dSJohannes Weiner count_vm_event(ZSWPWB); 11699986d35dSJohannes Weiner if (entry->objcg) 11709986d35dSJohannes Weiner count_objcg_event(entry->objcg, ZSWPWB); 11719986d35dSJohannes Weiner 1172a230c20eSChengming Zhou zswap_entry_free(entry); 11739986d35dSJohannes Weiner 11749986d35dSJohannes Weiner /* folio is up to date */ 11759986d35dSJohannes Weiner folio_mark_uptodate(folio); 11769986d35dSJohannes Weiner 11779986d35dSJohannes Weiner /* move it to the tail of the inactive list after end_writeback */ 11789986d35dSJohannes Weiner folio_set_reclaim(folio); 11799986d35dSJohannes Weiner 11809986d35dSJohannes Weiner /* start writeback */ 11819986d35dSJohannes Weiner __swap_writepage(folio, &wbc); 11829986d35dSJohannes Weiner folio_put(folio); 11839986d35dSJohannes Weiner 11849986d35dSJohannes Weiner return 0; 11859986d35dSJohannes Weiner } 11869986d35dSJohannes Weiner 11879986d35dSJohannes Weiner /********************************* 1188b5ba474fSNhat Pham * shrinker functions 1189b5ba474fSNhat Pham **********************************/ 1190b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 1191eb23ee4fSJohannes Weiner spinlock_t *lock, void *arg) 1192eb23ee4fSJohannes Weiner { 1193eb23ee4fSJohannes Weiner struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); 1194eb23ee4fSJohannes Weiner bool *encountered_page_in_swapcache = (bool *)arg; 1195eb23ee4fSJohannes Weiner swp_entry_t swpentry; 1196eb23ee4fSJohannes Weiner enum lru_status ret = LRU_REMOVED_RETRY; 1197eb23ee4fSJohannes Weiner int writeback_result; 1198eb23ee4fSJohannes Weiner 1199eb23ee4fSJohannes Weiner /* 1200f9c0f1c3SChengming Zhou * As soon as we drop the LRU lock, the entry can be freed by 1201f9c0f1c3SChengming Zhou * a concurrent invalidation. This means the following: 1202eb23ee4fSJohannes Weiner * 1203f9c0f1c3SChengming Zhou * 1. We extract the swp_entry_t to the stack, allowing 1204f9c0f1c3SChengming Zhou * zswap_writeback_entry() to pin the swap entry and 1205f9c0f1c3SChengming Zhou * then validate the zwap entry against that swap entry's 1206f9c0f1c3SChengming Zhou * tree using pointer value comparison. Only when that 1207f9c0f1c3SChengming Zhou * is successful can the entry be dereferenced. 1208f9c0f1c3SChengming Zhou * 1209f9c0f1c3SChengming Zhou * 2. Usually, objects are taken off the LRU for reclaim. In 1210f9c0f1c3SChengming Zhou * this case this isn't possible, because if reclaim fails 1211f9c0f1c3SChengming Zhou * for whatever reason, we have no means of knowing if the 1212f9c0f1c3SChengming Zhou * entry is alive to put it back on the LRU. 1213f9c0f1c3SChengming Zhou * 1214f9c0f1c3SChengming Zhou * So rotate it before dropping the lock. If the entry is 1215f9c0f1c3SChengming Zhou * written back or invalidated, the free path will unlink 1216f9c0f1c3SChengming Zhou * it. For failures, rotation is the right thing as well. 1217eb23ee4fSJohannes Weiner * 1218eb23ee4fSJohannes Weiner * Temporary failures, where the same entry should be tried 1219eb23ee4fSJohannes Weiner * again immediately, almost never happen for this shrinker. 1220eb23ee4fSJohannes Weiner * We don't do any trylocking; -ENOMEM comes closest, 1221eb23ee4fSJohannes Weiner * but that's extremely rare and doesn't happen spuriously 1222eb23ee4fSJohannes Weiner * either. Don't bother distinguishing this case. 1223eb23ee4fSJohannes Weiner */ 1224eb23ee4fSJohannes Weiner list_move_tail(item, &l->list); 1225eb23ee4fSJohannes Weiner 1226eb23ee4fSJohannes Weiner /* 1227eb23ee4fSJohannes Weiner * Once the lru lock is dropped, the entry might get freed. The 1228eb23ee4fSJohannes Weiner * swpentry is copied to the stack, and entry isn't deref'd again 1229eb23ee4fSJohannes Weiner * until the entry is verified to still be alive in the tree. 1230eb23ee4fSJohannes Weiner */ 1231eb23ee4fSJohannes Weiner swpentry = entry->swpentry; 1232eb23ee4fSJohannes Weiner 1233eb23ee4fSJohannes Weiner /* 1234eb23ee4fSJohannes Weiner * It's safe to drop the lock here because we return either 1235eb23ee4fSJohannes Weiner * LRU_REMOVED_RETRY or LRU_RETRY. 1236eb23ee4fSJohannes Weiner */ 1237eb23ee4fSJohannes Weiner spin_unlock(lock); 1238eb23ee4fSJohannes Weiner 1239eb23ee4fSJohannes Weiner writeback_result = zswap_writeback_entry(entry, swpentry); 1240eb23ee4fSJohannes Weiner 1241eb23ee4fSJohannes Weiner if (writeback_result) { 1242eb23ee4fSJohannes Weiner zswap_reject_reclaim_fail++; 1243eb23ee4fSJohannes Weiner ret = LRU_RETRY; 1244eb23ee4fSJohannes Weiner 1245eb23ee4fSJohannes Weiner /* 1246eb23ee4fSJohannes Weiner * Encountering a page already in swap cache is a sign that we are shrinking 1247eb23ee4fSJohannes Weiner * into the warmer region. We should terminate shrinking (if we're in the dynamic 1248eb23ee4fSJohannes Weiner * shrinker context). 1249eb23ee4fSJohannes Weiner */ 1250b49547adSChengming Zhou if (writeback_result == -EEXIST && encountered_page_in_swapcache) { 1251b49547adSChengming Zhou ret = LRU_STOP; 1252eb23ee4fSJohannes Weiner *encountered_page_in_swapcache = true; 1253b49547adSChengming Zhou } 1254eb23ee4fSJohannes Weiner } else { 1255eb23ee4fSJohannes Weiner zswap_written_back_pages++; 1256eb23ee4fSJohannes Weiner } 1257eb23ee4fSJohannes Weiner 1258eb23ee4fSJohannes Weiner spin_lock(lock); 1259eb23ee4fSJohannes Weiner return ret; 1260eb23ee4fSJohannes Weiner } 1261b5ba474fSNhat Pham 1262b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, 1263b5ba474fSNhat Pham struct shrink_control *sc) 1264b5ba474fSNhat Pham { 1265b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); 1266b5ba474fSNhat Pham unsigned long shrink_ret, nr_protected, lru_size; 1267b5ba474fSNhat Pham bool encountered_page_in_swapcache = false; 1268b5ba474fSNhat Pham 1269501a06feSNhat Pham if (!zswap_shrinker_enabled || 1270501a06feSNhat Pham !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { 1271b5ba474fSNhat Pham sc->nr_scanned = 0; 1272b5ba474fSNhat Pham return SHRINK_STOP; 1273b5ba474fSNhat Pham } 1274b5ba474fSNhat Pham 1275b5ba474fSNhat Pham nr_protected = 1276b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1277bf9b7df2SChengming Zhou lru_size = list_lru_shrink_count(&zswap.list_lru, sc); 1278b5ba474fSNhat Pham 1279b5ba474fSNhat Pham /* 1280b5ba474fSNhat Pham * Abort if we are shrinking into the protected region. 1281b5ba474fSNhat Pham * 1282b5ba474fSNhat Pham * This short-circuiting is necessary because if we have too many multiple 1283b5ba474fSNhat Pham * concurrent reclaimers getting the freeable zswap object counts at the 1284b5ba474fSNhat Pham * same time (before any of them made reasonable progress), the total 1285b5ba474fSNhat Pham * number of reclaimed objects might be more than the number of unprotected 1286b5ba474fSNhat Pham * objects (i.e the reclaimers will reclaim into the protected area of the 1287b5ba474fSNhat Pham * zswap LRU). 1288b5ba474fSNhat Pham */ 1289b5ba474fSNhat Pham if (nr_protected >= lru_size - sc->nr_to_scan) { 1290b5ba474fSNhat Pham sc->nr_scanned = 0; 1291b5ba474fSNhat Pham return SHRINK_STOP; 1292b5ba474fSNhat Pham } 1293b5ba474fSNhat Pham 1294bf9b7df2SChengming Zhou shrink_ret = list_lru_shrink_walk(&zswap.list_lru, sc, &shrink_memcg_cb, 1295b5ba474fSNhat Pham &encountered_page_in_swapcache); 1296b5ba474fSNhat Pham 1297b5ba474fSNhat Pham if (encountered_page_in_swapcache) 1298b5ba474fSNhat Pham return SHRINK_STOP; 1299b5ba474fSNhat Pham 1300b5ba474fSNhat Pham return shrink_ret ? shrink_ret : SHRINK_STOP; 1301b5ba474fSNhat Pham } 1302b5ba474fSNhat Pham 1303b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker, 1304b5ba474fSNhat Pham struct shrink_control *sc) 1305b5ba474fSNhat Pham { 1306b5ba474fSNhat Pham struct mem_cgroup *memcg = sc->memcg; 1307b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); 1308b5ba474fSNhat Pham unsigned long nr_backing, nr_stored, nr_freeable, nr_protected; 1309b5ba474fSNhat Pham 1310501a06feSNhat Pham if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) 1311b5ba474fSNhat Pham return 0; 1312b5ba474fSNhat Pham 1313b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM 13147d7ef0a4SYosry Ahmed mem_cgroup_flush_stats(memcg); 1315b5ba474fSNhat Pham nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; 1316b5ba474fSNhat Pham nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); 1317b5ba474fSNhat Pham #else 1318b5ba474fSNhat Pham /* use pool stats instead of memcg stats */ 1319bf9b7df2SChengming Zhou nr_backing = zswap_pool_total_size >> PAGE_SHIFT; 1320bf9b7df2SChengming Zhou nr_stored = atomic_read(&zswap.nr_stored); 1321b5ba474fSNhat Pham #endif 1322b5ba474fSNhat Pham 1323b5ba474fSNhat Pham if (!nr_stored) 1324b5ba474fSNhat Pham return 0; 1325b5ba474fSNhat Pham 1326b5ba474fSNhat Pham nr_protected = 1327b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1328bf9b7df2SChengming Zhou nr_freeable = list_lru_shrink_count(&zswap.list_lru, sc); 1329b5ba474fSNhat Pham /* 1330b5ba474fSNhat Pham * Subtract the lru size by an estimate of the number of pages 1331b5ba474fSNhat Pham * that should be protected. 1332b5ba474fSNhat Pham */ 1333b5ba474fSNhat Pham nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0; 1334b5ba474fSNhat Pham 1335b5ba474fSNhat Pham /* 1336b5ba474fSNhat Pham * Scale the number of freeable pages by the memory saving factor. 1337b5ba474fSNhat Pham * This ensures that the better zswap compresses memory, the fewer 1338b5ba474fSNhat Pham * pages we will evict to swap (as it will otherwise incur IO for 1339b5ba474fSNhat Pham * relatively small memory saving). 1340b5ba474fSNhat Pham */ 1341b5ba474fSNhat Pham return mult_frac(nr_freeable, nr_backing, nr_stored); 1342b5ba474fSNhat Pham } 1343b5ba474fSNhat Pham 1344bf9b7df2SChengming Zhou static struct shrinker *zswap_alloc_shrinker(void) 1345b5ba474fSNhat Pham { 1346bf9b7df2SChengming Zhou struct shrinker *shrinker; 1347b5ba474fSNhat Pham 1348bf9b7df2SChengming Zhou shrinker = 1349bf9b7df2SChengming Zhou shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); 1350bf9b7df2SChengming Zhou if (!shrinker) 1351bf9b7df2SChengming Zhou return NULL; 1352bf9b7df2SChengming Zhou 1353bf9b7df2SChengming Zhou shrinker->scan_objects = zswap_shrinker_scan; 1354bf9b7df2SChengming Zhou shrinker->count_objects = zswap_shrinker_count; 1355bf9b7df2SChengming Zhou shrinker->batch = 0; 1356bf9b7df2SChengming Zhou shrinker->seeks = DEFAULT_SEEKS; 1357bf9b7df2SChengming Zhou return shrinker; 1358b5ba474fSNhat Pham } 1359b5ba474fSNhat Pham 1360a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg) 1361a65b0e76SDomenico Cerasuolo { 1362a65b0e76SDomenico Cerasuolo int nid, shrunk = 0; 1363a65b0e76SDomenico Cerasuolo 1364501a06feSNhat Pham if (!mem_cgroup_zswap_writeback_enabled(memcg)) 1365501a06feSNhat Pham return -EINVAL; 1366501a06feSNhat Pham 1367a65b0e76SDomenico Cerasuolo /* 1368a65b0e76SDomenico Cerasuolo * Skip zombies because their LRUs are reparented and we would be 1369a65b0e76SDomenico Cerasuolo * reclaiming from the parent instead of the dead memcg. 1370a65b0e76SDomenico Cerasuolo */ 1371a65b0e76SDomenico Cerasuolo if (memcg && !mem_cgroup_online(memcg)) 1372a65b0e76SDomenico Cerasuolo return -ENOENT; 1373a65b0e76SDomenico Cerasuolo 1374a65b0e76SDomenico Cerasuolo for_each_node_state(nid, N_NORMAL_MEMORY) { 1375a65b0e76SDomenico Cerasuolo unsigned long nr_to_walk = 1; 1376a65b0e76SDomenico Cerasuolo 1377bf9b7df2SChengming Zhou shrunk += list_lru_walk_one(&zswap.list_lru, nid, memcg, 1378a65b0e76SDomenico Cerasuolo &shrink_memcg_cb, NULL, &nr_to_walk); 1379a65b0e76SDomenico Cerasuolo } 1380a65b0e76SDomenico Cerasuolo return shrunk ? 0 : -EAGAIN; 1381f999f38bSDomenico Cerasuolo } 1382f999f38bSDomenico Cerasuolo 138345190f01SVitaly Wool static void shrink_worker(struct work_struct *w) 138445190f01SVitaly Wool { 1385a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 1386e0228d59SDomenico Cerasuolo int ret, failures = 0; 138745190f01SVitaly Wool 1388a65b0e76SDomenico Cerasuolo /* global reclaim will select cgroup in a round-robin fashion. */ 1389e0228d59SDomenico Cerasuolo do { 1390bf9b7df2SChengming Zhou spin_lock(&zswap.shrink_lock); 1391bf9b7df2SChengming Zhou zswap.next_shrink = mem_cgroup_iter(NULL, zswap.next_shrink, NULL); 1392bf9b7df2SChengming Zhou memcg = zswap.next_shrink; 1393a65b0e76SDomenico Cerasuolo 1394a65b0e76SDomenico Cerasuolo /* 1395a65b0e76SDomenico Cerasuolo * We need to retry if we have gone through a full round trip, or if we 1396a65b0e76SDomenico Cerasuolo * got an offline memcg (or else we risk undoing the effect of the 1397a65b0e76SDomenico Cerasuolo * zswap memcg offlining cleanup callback). This is not catastrophic 1398a65b0e76SDomenico Cerasuolo * per se, but it will keep the now offlined memcg hostage for a while. 1399a65b0e76SDomenico Cerasuolo * 1400a65b0e76SDomenico Cerasuolo * Note that if we got an online memcg, we will keep the extra 1401a65b0e76SDomenico Cerasuolo * reference in case the original reference obtained by mem_cgroup_iter 1402a65b0e76SDomenico Cerasuolo * is dropped by the zswap memcg offlining callback, ensuring that the 1403a65b0e76SDomenico Cerasuolo * memcg is not killed when we are reclaiming. 1404a65b0e76SDomenico Cerasuolo */ 1405a65b0e76SDomenico Cerasuolo if (!memcg) { 1406bf9b7df2SChengming Zhou spin_unlock(&zswap.shrink_lock); 1407e0228d59SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1408e0228d59SDomenico Cerasuolo break; 1409a65b0e76SDomenico Cerasuolo 1410a65b0e76SDomenico Cerasuolo goto resched; 1411e0228d59SDomenico Cerasuolo } 1412a65b0e76SDomenico Cerasuolo 1413a65b0e76SDomenico Cerasuolo if (!mem_cgroup_tryget_online(memcg)) { 1414a65b0e76SDomenico Cerasuolo /* drop the reference from mem_cgroup_iter() */ 1415a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, memcg); 1416bf9b7df2SChengming Zhou zswap.next_shrink = NULL; 1417bf9b7df2SChengming Zhou spin_unlock(&zswap.shrink_lock); 1418a65b0e76SDomenico Cerasuolo 1419a65b0e76SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1420a65b0e76SDomenico Cerasuolo break; 1421a65b0e76SDomenico Cerasuolo 1422a65b0e76SDomenico Cerasuolo goto resched; 1423a65b0e76SDomenico Cerasuolo } 1424bf9b7df2SChengming Zhou spin_unlock(&zswap.shrink_lock); 1425a65b0e76SDomenico Cerasuolo 1426a65b0e76SDomenico Cerasuolo ret = shrink_memcg(memcg); 1427a65b0e76SDomenico Cerasuolo /* drop the extra reference */ 1428a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1429a65b0e76SDomenico Cerasuolo 1430a65b0e76SDomenico Cerasuolo if (ret == -EINVAL) 1431a65b0e76SDomenico Cerasuolo break; 1432a65b0e76SDomenico Cerasuolo if (ret && ++failures == MAX_RECLAIM_RETRIES) 1433a65b0e76SDomenico Cerasuolo break; 1434a65b0e76SDomenico Cerasuolo 1435a65b0e76SDomenico Cerasuolo resched: 1436e0228d59SDomenico Cerasuolo cond_resched(); 1437e0228d59SDomenico Cerasuolo } while (!zswap_can_accept()); 143845190f01SVitaly Wool } 143945190f01SVitaly Wool 1440a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1441a85f878bSSrividya Desireddy { 1442a85f878bSSrividya Desireddy unsigned long *page; 144362bf1258STaejoon Song unsigned long val; 144462bf1258STaejoon Song unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1445a85f878bSSrividya Desireddy 1446a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 144762bf1258STaejoon Song val = page[0]; 144862bf1258STaejoon Song 144962bf1258STaejoon Song if (val != page[last_pos]) 145062bf1258STaejoon Song return 0; 145162bf1258STaejoon Song 145262bf1258STaejoon Song for (pos = 1; pos < last_pos; pos++) { 145362bf1258STaejoon Song if (val != page[pos]) 1454a85f878bSSrividya Desireddy return 0; 1455a85f878bSSrividya Desireddy } 145662bf1258STaejoon Song 145762bf1258STaejoon Song *value = val; 145862bf1258STaejoon Song 1459a85f878bSSrividya Desireddy return 1; 1460a85f878bSSrividya Desireddy } 1461a85f878bSSrividya Desireddy 1462a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value) 1463a85f878bSSrividya Desireddy { 1464a85f878bSSrividya Desireddy unsigned long *page; 1465a85f878bSSrividya Desireddy 1466a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 1467a85f878bSSrividya Desireddy memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1468a85f878bSSrividya Desireddy } 1469a85f878bSSrividya Desireddy 147034f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio) 14712b281117SSeth Jennings { 14723d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 147342c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 147444c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 14752b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 1476f4840ccfSJohannes Weiner struct obj_cgroup *objcg = NULL; 1477a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg = NULL; 147842c06a0eSJohannes Weiner 147934f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 148034f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 14812b281117SSeth Jennings 148234f4c198SMatthew Wilcox (Oracle) /* Large folios aren't supported */ 148334f4c198SMatthew Wilcox (Oracle) if (folio_test_large(folio)) 148442c06a0eSJohannes Weiner return false; 14857ba71669SHuang Ying 1486678e54d4SChengming Zhou if (!zswap_enabled) 1487f576a1e8SChengming Zhou goto check_old; 1488678e54d4SChengming Zhou 1489074e3e26SMatthew Wilcox (Oracle) objcg = get_obj_cgroup_from_folio(folio); 1490a65b0e76SDomenico Cerasuolo if (objcg && !obj_cgroup_may_zswap(objcg)) { 1491a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1492a65b0e76SDomenico Cerasuolo if (shrink_memcg(memcg)) { 1493a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 14940bdf0efaSNhat Pham goto reject; 1495a65b0e76SDomenico Cerasuolo } 1496a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1497a65b0e76SDomenico Cerasuolo } 1498f4840ccfSJohannes Weiner 14992b281117SSeth Jennings /* reclaim space if needed */ 15002b281117SSeth Jennings if (zswap_is_full()) { 15012b281117SSeth Jennings zswap_pool_limit_hit++; 150245190f01SVitaly Wool zswap_pool_reached_full = true; 1503f4840ccfSJohannes Weiner goto shrink; 15042b281117SSeth Jennings } 150516e536efSLi Wang 150645190f01SVitaly Wool if (zswap_pool_reached_full) { 150742c06a0eSJohannes Weiner if (!zswap_can_accept()) 1508e0228d59SDomenico Cerasuolo goto shrink; 150942c06a0eSJohannes Weiner else 151045190f01SVitaly Wool zswap_pool_reached_full = false; 15112b281117SSeth Jennings } 15122b281117SSeth Jennings 15132b281117SSeth Jennings /* allocate entry */ 1514be7fc97cSJohannes Weiner entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); 15152b281117SSeth Jennings if (!entry) { 15162b281117SSeth Jennings zswap_reject_kmemcache_fail++; 15172b281117SSeth Jennings goto reject; 15182b281117SSeth Jennings } 15192b281117SSeth Jennings 1520a85f878bSSrividya Desireddy if (zswap_same_filled_pages_enabled) { 1521be7fc97cSJohannes Weiner unsigned long value; 1522be7fc97cSJohannes Weiner u8 *src; 1523be7fc97cSJohannes Weiner 1524be7fc97cSJohannes Weiner src = kmap_local_folio(folio, 0); 1525a85f878bSSrividya Desireddy if (zswap_is_page_same_filled(src, &value)) { 1526003ae2fbSFabio M. De Francesco kunmap_local(src); 1527a85f878bSSrividya Desireddy entry->length = 0; 1528a85f878bSSrividya Desireddy entry->value = value; 1529a85f878bSSrividya Desireddy atomic_inc(&zswap_same_filled_pages); 1530a85f878bSSrividya Desireddy goto insert_entry; 1531a85f878bSSrividya Desireddy } 1532003ae2fbSFabio M. De Francesco kunmap_local(src); 1533a85f878bSSrividya Desireddy } 1534a85f878bSSrividya Desireddy 153542c06a0eSJohannes Weiner if (!zswap_non_same_filled_pages_enabled) 1536cb325dddSMaciej S. Szmigiero goto freepage; 1537cb325dddSMaciej S. Szmigiero 1538f1c54846SDan Streetman /* if entry is successfully added, it keeps the reference */ 1539f1c54846SDan Streetman entry->pool = zswap_pool_current_get(); 154042c06a0eSJohannes Weiner if (!entry->pool) 15412b281117SSeth Jennings goto freepage; 15422b281117SSeth Jennings 1543a65b0e76SDomenico Cerasuolo if (objcg) { 1544a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1545bf9b7df2SChengming Zhou if (memcg_list_lru_alloc(memcg, &zswap.list_lru, GFP_KERNEL)) { 1546a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1547a65b0e76SDomenico Cerasuolo goto put_pool; 1548a65b0e76SDomenico Cerasuolo } 1549a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1550a65b0e76SDomenico Cerasuolo } 1551a65b0e76SDomenico Cerasuolo 1552fa9ad6e2SJohannes Weiner if (!zswap_compress(folio, entry)) 1553fa9ad6e2SJohannes Weiner goto put_pool; 15541ec3b5feSBarry Song 1555a85f878bSSrividya Desireddy insert_entry: 1556be7fc97cSJohannes Weiner entry->swpentry = swp; 1557f4840ccfSJohannes Weiner entry->objcg = objcg; 1558f4840ccfSJohannes Weiner if (objcg) { 1559f4840ccfSJohannes Weiner obj_cgroup_charge_zswap(objcg, entry->length); 1560f4840ccfSJohannes Weiner /* Account before objcg ref is moved to tree */ 1561f4840ccfSJohannes Weiner count_objcg_event(objcg, ZSWPOUT); 1562f4840ccfSJohannes Weiner } 1563f4840ccfSJohannes Weiner 15642b281117SSeth Jennings /* map */ 15652b281117SSeth Jennings spin_lock(&tree->lock); 1566ca56489cSDomenico Cerasuolo /* 1567f576a1e8SChengming Zhou * The folio may have been dirtied again, invalidate the 1568f576a1e8SChengming Zhou * possibly stale entry before inserting the new entry. 1569ca56489cSDomenico Cerasuolo */ 1570f576a1e8SChengming Zhou if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 157156c67049SJohannes Weiner zswap_invalidate_entry(tree, dupentry); 1572f576a1e8SChengming Zhou WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry)); 15732b281117SSeth Jennings } 157435499e2bSDomenico Cerasuolo if (entry->length) { 1575a65b0e76SDomenico Cerasuolo INIT_LIST_HEAD(&entry->lru); 1576bf9b7df2SChengming Zhou zswap_lru_add(&zswap.list_lru, entry); 1577bf9b7df2SChengming Zhou atomic_inc(&zswap.nr_stored); 1578f999f38bSDomenico Cerasuolo } 15792b281117SSeth Jennings spin_unlock(&tree->lock); 15802b281117SSeth Jennings 15812b281117SSeth Jennings /* update stats */ 15822b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 1583f1c54846SDan Streetman zswap_update_total_size(); 1584f6498b77SJohannes Weiner count_vm_event(ZSWPOUT); 15852b281117SSeth Jennings 158642c06a0eSJohannes Weiner return true; 15872b281117SSeth Jennings 1588a65b0e76SDomenico Cerasuolo put_pool: 1589f1c54846SDan Streetman zswap_pool_put(entry->pool); 1590f1c54846SDan Streetman freepage: 15912b281117SSeth Jennings zswap_entry_cache_free(entry); 15922b281117SSeth Jennings reject: 1593f4840ccfSJohannes Weiner if (objcg) 1594f4840ccfSJohannes Weiner obj_cgroup_put(objcg); 1595f576a1e8SChengming Zhou check_old: 1596f576a1e8SChengming Zhou /* 1597f576a1e8SChengming Zhou * If the zswap store fails or zswap is disabled, we must invalidate the 1598f576a1e8SChengming Zhou * possibly stale entry which was previously stored at this offset. 1599f576a1e8SChengming Zhou * Otherwise, writeback could overwrite the new data in the swapfile. 1600f576a1e8SChengming Zhou */ 1601f576a1e8SChengming Zhou spin_lock(&tree->lock); 1602f576a1e8SChengming Zhou entry = zswap_rb_search(&tree->rbroot, offset); 1603f576a1e8SChengming Zhou if (entry) 1604f576a1e8SChengming Zhou zswap_invalidate_entry(tree, entry); 1605f576a1e8SChengming Zhou spin_unlock(&tree->lock); 160642c06a0eSJohannes Weiner return false; 1607f4840ccfSJohannes Weiner 1608f4840ccfSJohannes Weiner shrink: 1609bf9b7df2SChengming Zhou queue_work(shrink_wq, &zswap.shrink_work); 1610f4840ccfSJohannes Weiner goto reject; 16112b281117SSeth Jennings } 16122b281117SSeth Jennings 1613ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio) 16142b281117SSeth Jennings { 16153d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 161642c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 1617ca54f6d8SMatthew Wilcox (Oracle) struct page *page = &folio->page; 161844c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 16192b281117SSeth Jennings struct zswap_entry *entry; 162032acba4cSChengming Zhou u8 *dst; 162142c06a0eSJohannes Weiner 1622ca54f6d8SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 16232b281117SSeth Jennings 16242b281117SSeth Jennings spin_lock(&tree->lock); 16255b297f70SJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 16262b281117SSeth Jennings if (!entry) { 16272b281117SSeth Jennings spin_unlock(&tree->lock); 162842c06a0eSJohannes Weiner return false; 16292b281117SSeth Jennings } 1630a230c20eSChengming Zhou zswap_rb_erase(&tree->rbroot, entry); 16312b281117SSeth Jennings spin_unlock(&tree->lock); 16322b281117SSeth Jennings 163366447fd0SChengming Zhou if (entry->length) 1634ff2972aaSJohannes Weiner zswap_decompress(entry, page); 163566447fd0SChengming Zhou else { 1636003ae2fbSFabio M. De Francesco dst = kmap_local_page(page); 1637a85f878bSSrividya Desireddy zswap_fill_page(dst, entry->value); 1638003ae2fbSFabio M. De Francesco kunmap_local(dst); 1639a85f878bSSrividya Desireddy } 1640a85f878bSSrividya Desireddy 1641f6498b77SJohannes Weiner count_vm_event(ZSWPIN); 1642f4840ccfSJohannes Weiner if (entry->objcg) 1643f4840ccfSJohannes Weiner count_objcg_event(entry->objcg, ZSWPIN); 1644c75f5c1eSChengming Zhou 1645a230c20eSChengming Zhou zswap_entry_free(entry); 16462b281117SSeth Jennings 1647c2e2ba77SChengming Zhou folio_mark_dirty(folio); 1648c2e2ba77SChengming Zhou 164966447fd0SChengming Zhou return true; 16502b281117SSeth Jennings } 16512b281117SSeth Jennings 16520827a1fbSChengming Zhou void zswap_invalidate(swp_entry_t swp) 16532b281117SSeth Jennings { 16540827a1fbSChengming Zhou pgoff_t offset = swp_offset(swp); 16550827a1fbSChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 16562b281117SSeth Jennings struct zswap_entry *entry; 16572b281117SSeth Jennings 16582b281117SSeth Jennings spin_lock(&tree->lock); 16592b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 166006ed2289SJohannes Weiner if (entry) 1661b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 16622b281117SSeth Jennings spin_unlock(&tree->lock); 16632b281117SSeth Jennings } 16642b281117SSeth Jennings 166544c7c734SChengming Zhou int zswap_swapon(int type, unsigned long nr_pages) 166642c06a0eSJohannes Weiner { 166744c7c734SChengming Zhou struct zswap_tree *trees, *tree; 166844c7c734SChengming Zhou unsigned int nr, i; 166942c06a0eSJohannes Weiner 167044c7c734SChengming Zhou nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 167144c7c734SChengming Zhou trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); 167244c7c734SChengming Zhou if (!trees) { 167342c06a0eSJohannes Weiner pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1674bb29fd77SChengming Zhou return -ENOMEM; 167542c06a0eSJohannes Weiner } 167642c06a0eSJohannes Weiner 167744c7c734SChengming Zhou for (i = 0; i < nr; i++) { 167844c7c734SChengming Zhou tree = trees + i; 167942c06a0eSJohannes Weiner tree->rbroot = RB_ROOT; 168042c06a0eSJohannes Weiner spin_lock_init(&tree->lock); 168144c7c734SChengming Zhou } 168244c7c734SChengming Zhou 168344c7c734SChengming Zhou nr_zswap_trees[type] = nr; 168444c7c734SChengming Zhou zswap_trees[type] = trees; 1685bb29fd77SChengming Zhou return 0; 168642c06a0eSJohannes Weiner } 168742c06a0eSJohannes Weiner 168842c06a0eSJohannes Weiner void zswap_swapoff(int type) 16892b281117SSeth Jennings { 169044c7c734SChengming Zhou struct zswap_tree *trees = zswap_trees[type]; 169144c7c734SChengming Zhou unsigned int i; 16922b281117SSeth Jennings 169344c7c734SChengming Zhou if (!trees) 16942b281117SSeth Jennings return; 16952b281117SSeth Jennings 169683e68f25SYosry Ahmed /* try_to_unuse() invalidated all the entries already */ 169783e68f25SYosry Ahmed for (i = 0; i < nr_zswap_trees[type]; i++) 169883e68f25SYosry Ahmed WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot)); 169944c7c734SChengming Zhou 170044c7c734SChengming Zhou kvfree(trees); 170144c7c734SChengming Zhou nr_zswap_trees[type] = 0; 1702aa9bca05SWeijie Yang zswap_trees[type] = NULL; 17032b281117SSeth Jennings } 17042b281117SSeth Jennings 17052b281117SSeth Jennings /********************************* 17062b281117SSeth Jennings * debugfs functions 17072b281117SSeth Jennings **********************************/ 17082b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 17092b281117SSeth Jennings #include <linux/debugfs.h> 17102b281117SSeth Jennings 17112b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 17122b281117SSeth Jennings 1713141fdeecSLiu Shixin static int zswap_debugfs_init(void) 17142b281117SSeth Jennings { 17152b281117SSeth Jennings if (!debugfs_initialized()) 17162b281117SSeth Jennings return -ENODEV; 17172b281117SSeth Jennings 17182b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 17192b281117SSeth Jennings 17200825a6f9SJoe Perches debugfs_create_u64("pool_limit_hit", 0444, 17212b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 17220825a6f9SJoe Perches debugfs_create_u64("reject_reclaim_fail", 0444, 17232b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 17240825a6f9SJoe Perches debugfs_create_u64("reject_alloc_fail", 0444, 17252b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 17260825a6f9SJoe Perches debugfs_create_u64("reject_kmemcache_fail", 0444, 17272b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1728cb61dad8SNhat Pham debugfs_create_u64("reject_compress_fail", 0444, 1729cb61dad8SNhat Pham zswap_debugfs_root, &zswap_reject_compress_fail); 17300825a6f9SJoe Perches debugfs_create_u64("reject_compress_poor", 0444, 17312b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 17320825a6f9SJoe Perches debugfs_create_u64("written_back_pages", 0444, 17332b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 17340825a6f9SJoe Perches debugfs_create_u64("pool_total_size", 0444, 173512d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 17360825a6f9SJoe Perches debugfs_create_atomic_t("stored_pages", 0444, 17372b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 1738a85f878bSSrividya Desireddy debugfs_create_atomic_t("same_filled_pages", 0444, 1739a85f878bSSrividya Desireddy zswap_debugfs_root, &zswap_same_filled_pages); 17402b281117SSeth Jennings 17412b281117SSeth Jennings return 0; 17422b281117SSeth Jennings } 17432b281117SSeth Jennings #else 1744141fdeecSLiu Shixin static int zswap_debugfs_init(void) 17452b281117SSeth Jennings { 17462b281117SSeth Jennings return 0; 17472b281117SSeth Jennings } 17482b281117SSeth Jennings #endif 17492b281117SSeth Jennings 17502b281117SSeth Jennings /********************************* 17512b281117SSeth Jennings * module init and exit 17522b281117SSeth Jennings **********************************/ 1753141fdeecSLiu Shixin static int zswap_setup(void) 17542b281117SSeth Jennings { 1755f1c54846SDan Streetman struct zswap_pool *pool; 1756ad7ed770SSebastian Andrzej Siewior int ret; 175760105e12SMinchan Kim 1758b7919122SLiu Shixin zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1759b7919122SLiu Shixin if (!zswap_entry_cache) { 17602b281117SSeth Jennings pr_err("entry cache creation failed\n"); 1761f1c54846SDan Streetman goto cache_fail; 17622b281117SSeth Jennings } 1763f1c54846SDan Streetman 1764cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1765cab7a7e5SSebastian Andrzej Siewior "mm/zswap_pool:prepare", 1766cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_prepare, 1767cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_dead); 1768cab7a7e5SSebastian Andrzej Siewior if (ret) 1769cab7a7e5SSebastian Andrzej Siewior goto hp_fail; 1770cab7a7e5SSebastian Andrzej Siewior 1771bf9b7df2SChengming Zhou shrink_wq = alloc_workqueue("zswap-shrink", 1772bf9b7df2SChengming Zhou WQ_UNBOUND|WQ_MEM_RECLAIM, 1); 1773bf9b7df2SChengming Zhou if (!shrink_wq) 1774bf9b7df2SChengming Zhou goto shrink_wq_fail; 1775bf9b7df2SChengming Zhou 1776bf9b7df2SChengming Zhou zswap.shrinker = zswap_alloc_shrinker(); 1777bf9b7df2SChengming Zhou if (!zswap.shrinker) 1778bf9b7df2SChengming Zhou goto shrinker_fail; 1779bf9b7df2SChengming Zhou if (list_lru_init_memcg(&zswap.list_lru, zswap.shrinker)) 1780bf9b7df2SChengming Zhou goto lru_fail; 1781bf9b7df2SChengming Zhou shrinker_register(zswap.shrinker); 1782bf9b7df2SChengming Zhou 1783bf9b7df2SChengming Zhou INIT_WORK(&zswap.shrink_work, shrink_worker); 1784bf9b7df2SChengming Zhou atomic_set(&zswap.nr_stored, 0); 1785bf9b7df2SChengming Zhou spin_lock_init(&zswap.shrink_lock); 1786bf9b7df2SChengming Zhou 1787f1c54846SDan Streetman pool = __zswap_pool_create_fallback(); 1788ae3d89a7SDan Streetman if (pool) { 1789f1c54846SDan Streetman pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1790b8cf32dcSYosry Ahmed zpool_get_type(pool->zpools[0])); 1791f1c54846SDan Streetman list_add(&pool->list, &zswap_pools); 1792ae3d89a7SDan Streetman zswap_has_pool = true; 1793ae3d89a7SDan Streetman } else { 1794ae3d89a7SDan Streetman pr_err("pool creation failed\n"); 1795ae3d89a7SDan Streetman zswap_enabled = false; 1796ae3d89a7SDan Streetman } 179760105e12SMinchan Kim 17982b281117SSeth Jennings if (zswap_debugfs_init()) 17992b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 18009021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_SUCCEED; 18012b281117SSeth Jennings return 0; 1802f1c54846SDan Streetman 1803bf9b7df2SChengming Zhou lru_fail: 1804bf9b7df2SChengming Zhou shrinker_free(zswap.shrinker); 1805bf9b7df2SChengming Zhou shrinker_fail: 1806bf9b7df2SChengming Zhou destroy_workqueue(shrink_wq); 1807bf9b7df2SChengming Zhou shrink_wq_fail: 1808bf9b7df2SChengming Zhou cpuhp_remove_multi_state(CPUHP_MM_ZSWP_POOL_PREPARE); 1809cab7a7e5SSebastian Andrzej Siewior hp_fail: 1810b7919122SLiu Shixin kmem_cache_destroy(zswap_entry_cache); 1811f1c54846SDan Streetman cache_fail: 1812d7b028f5SDan Streetman /* if built-in, we aren't unloaded on failure; don't allow use */ 18139021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_FAILED; 1814d7b028f5SDan Streetman zswap_enabled = false; 18152b281117SSeth Jennings return -ENOMEM; 18162b281117SSeth Jennings } 1817141fdeecSLiu Shixin 1818141fdeecSLiu Shixin static int __init zswap_init(void) 1819141fdeecSLiu Shixin { 1820141fdeecSLiu Shixin if (!zswap_enabled) 1821141fdeecSLiu Shixin return 0; 1822141fdeecSLiu Shixin return zswap_setup(); 1823141fdeecSLiu Shixin } 18242b281117SSeth Jennings /* must be late so crypto has time to come up */ 1825141fdeecSLiu Shixin late_initcall(zswap_init); 18262b281117SSeth Jennings 182768386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 18282b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 1829