1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22b281117SSeth Jennings /* 32b281117SSeth Jennings * zswap.c - zswap driver file 42b281117SSeth Jennings * 542c06a0eSJohannes Weiner * zswap is a cache that takes pages that are in the process 62b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 72b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 82b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 92b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 102b281117SSeth Jennings * 112b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 122b281117SSeth Jennings */ 132b281117SSeth Jennings 142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 152b281117SSeth Jennings 162b281117SSeth Jennings #include <linux/module.h> 172b281117SSeth Jennings #include <linux/cpu.h> 182b281117SSeth Jennings #include <linux/highmem.h> 192b281117SSeth Jennings #include <linux/slab.h> 202b281117SSeth Jennings #include <linux/spinlock.h> 212b281117SSeth Jennings #include <linux/types.h> 222b281117SSeth Jennings #include <linux/atomic.h> 232b281117SSeth Jennings #include <linux/rbtree.h> 242b281117SSeth Jennings #include <linux/swap.h> 252b281117SSeth Jennings #include <linux/crypto.h> 261ec3b5feSBarry Song #include <linux/scatterlist.h> 27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h> 282b281117SSeth Jennings #include <linux/mempool.h> 2912d79d64SDan Streetman #include <linux/zpool.h> 301ec3b5feSBarry Song #include <crypto/acompress.h> 3142c06a0eSJohannes Weiner #include <linux/zswap.h> 322b281117SSeth Jennings #include <linux/mm_types.h> 332b281117SSeth Jennings #include <linux/page-flags.h> 342b281117SSeth Jennings #include <linux/swapops.h> 352b281117SSeth Jennings #include <linux/writeback.h> 362b281117SSeth Jennings #include <linux/pagemap.h> 3745190f01SVitaly Wool #include <linux/workqueue.h> 38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h> 392b281117SSeth Jennings 40014bb1deSNeilBrown #include "swap.h" 41e0228d59SDomenico Cerasuolo #include "internal.h" 42014bb1deSNeilBrown 432b281117SSeth Jennings /********************************* 442b281117SSeth Jennings * statistics 452b281117SSeth Jennings **********************************/ 4612d79d64SDan Streetman /* Total bytes used by the compressed storage */ 47f6498b77SJohannes Weiner u64 zswap_pool_total_size; 482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0); 50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */ 51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */ 67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail; 682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 692b281117SSeth Jennings static u64 zswap_reject_compress_poor; 702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 712b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 742b281117SSeth Jennings 7545190f01SVitaly Wool /* Shrinker work queue */ 7645190f01SVitaly Wool static struct workqueue_struct *shrink_wq; 7745190f01SVitaly Wool /* Pool limit was hit, we need to calm down */ 7845190f01SVitaly Wool static bool zswap_pool_reached_full; 7945190f01SVitaly Wool 802b281117SSeth Jennings /********************************* 812b281117SSeth Jennings * tunables 822b281117SSeth Jennings **********************************/ 83c00ed16aSDan Streetman 84bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET "" 85bae21db8SDan Streetman 86141fdeecSLiu Shixin static int zswap_setup(void); 87141fdeecSLiu Shixin 88bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */ 89bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 90d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *, 91d7b028f5SDan Streetman const struct kernel_param *); 9283aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = { 93d7b028f5SDan Streetman .set = zswap_enabled_param_set, 94d7b028f5SDan Streetman .get = param_get_bool, 95d7b028f5SDan Streetman }; 96d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 972b281117SSeth Jennings 9890b0fc26SDan Streetman /* Crypto compressor to use */ 99bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 10090b0fc26SDan Streetman static int zswap_compressor_param_set(const char *, 10190b0fc26SDan Streetman const struct kernel_param *); 10283aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = { 10390b0fc26SDan Streetman .set = zswap_compressor_param_set, 104c99b42c3SDan Streetman .get = param_get_charp, 105c99b42c3SDan Streetman .free = param_free_charp, 10690b0fc26SDan Streetman }; 10790b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops, 108c99b42c3SDan Streetman &zswap_compressor, 0644); 10990b0fc26SDan Streetman 11090b0fc26SDan Streetman /* Compressed storage zpool to use */ 111bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 11290b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *); 11383aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = { 11490b0fc26SDan Streetman .set = zswap_zpool_param_set, 115c99b42c3SDan Streetman .get = param_get_charp, 116c99b42c3SDan Streetman .free = param_free_charp, 11790b0fc26SDan Streetman }; 118c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 1192b281117SSeth Jennings 1202b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 1212b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 12290b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 12360105e12SMinchan Kim 12445190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */ 12545190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 12645190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 12745190f01SVitaly Wool uint, 0644); 12845190f01SVitaly Wool 129cb325dddSMaciej S. Szmigiero /* 130cb325dddSMaciej S. Szmigiero * Enable/disable handling same-value filled pages (enabled by default). 131cb325dddSMaciej S. Szmigiero * If disabled every page is considered non-same-value filled. 132cb325dddSMaciej S. Szmigiero */ 133a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true; 134a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 135a85f878bSSrividya Desireddy bool, 0644); 136a85f878bSSrividya Desireddy 137cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */ 138cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true; 139cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 140cb325dddSMaciej S. Szmigiero bool, 0644); 141cb325dddSMaciej S. Szmigiero 142b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */ 143b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32 144b8cf32dcSYosry Ahmed 145b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */ 146b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED( 147b5ba474fSNhat Pham CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); 148b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); 149b5ba474fSNhat Pham 150501a06feSNhat Pham bool is_zswap_enabled(void) 151501a06feSNhat Pham { 152501a06feSNhat Pham return zswap_enabled; 153501a06feSNhat Pham } 154501a06feSNhat Pham 1552b281117SSeth Jennings /********************************* 1562b281117SSeth Jennings * data structures 1572b281117SSeth Jennings **********************************/ 158f1c54846SDan Streetman 1591ec3b5feSBarry Song struct crypto_acomp_ctx { 1601ec3b5feSBarry Song struct crypto_acomp *acomp; 1611ec3b5feSBarry Song struct acomp_req *req; 1621ec3b5feSBarry Song struct crypto_wait wait; 1638ba2f844SChengming Zhou u8 *buffer; 1648ba2f844SChengming Zhou struct mutex mutex; 1651ec3b5feSBarry Song }; 1661ec3b5feSBarry Song 167f999f38bSDomenico Cerasuolo /* 168f999f38bSDomenico Cerasuolo * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 169f999f38bSDomenico Cerasuolo * The only case where lru_lock is not acquired while holding tree.lock is 170f999f38bSDomenico Cerasuolo * when a zswap_entry is taken off the lru for writeback, in that case it 171f999f38bSDomenico Cerasuolo * needs to be verified that it's still valid in the tree. 172f999f38bSDomenico Cerasuolo */ 173f1c54846SDan Streetman struct zswap_pool { 174b8cf32dcSYosry Ahmed struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 1751ec3b5feSBarry Song struct crypto_acomp_ctx __percpu *acomp_ctx; 176f1c54846SDan Streetman struct kref kref; 177f1c54846SDan Streetman struct list_head list; 17845190f01SVitaly Wool struct work_struct release_work; 17945190f01SVitaly Wool struct work_struct shrink_work; 180cab7a7e5SSebastian Andrzej Siewior struct hlist_node node; 181f1c54846SDan Streetman char tfm_name[CRYPTO_MAX_ALG_NAME]; 182a65b0e76SDomenico Cerasuolo struct list_lru list_lru; 183a65b0e76SDomenico Cerasuolo struct mem_cgroup *next_shrink; 184b5ba474fSNhat Pham struct shrinker *shrinker; 185b5ba474fSNhat Pham atomic_t nr_stored; 186f1c54846SDan Streetman }; 187f1c54846SDan Streetman 1882b281117SSeth Jennings /* 1892b281117SSeth Jennings * struct zswap_entry 1902b281117SSeth Jennings * 1912b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1922b281117SSeth Jennings * page within zswap. 1932b281117SSeth Jennings * 1942b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 19597157d89SXiu Jianfeng * swpentry - associated swap entry, the offset indexes into the red-black tree 1962b281117SSeth Jennings * refcount - the number of outstanding reference to the entry. This is needed 1972b281117SSeth Jennings * to protect against premature freeing of the entry by code 1986b452516SSeongJae Park * concurrent calls to load, invalidate, and writeback. The lock 1992b281117SSeth Jennings * for the zswap_tree structure that contains the entry must 2002b281117SSeth Jennings * be held while changing the refcount. Since the lock must 2012b281117SSeth Jennings * be held, there is no reason to also make refcount atomic. 2022b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 203f999f38bSDomenico Cerasuolo * decompression. For a same value filled page length is 0, and both 204f999f38bSDomenico Cerasuolo * pool and lru are invalid and must be ignored. 205f1c54846SDan Streetman * pool - the zswap_pool the entry's data is in 206f1c54846SDan Streetman * handle - zpool allocation handle that stores the compressed page data 207a85f878bSSrividya Desireddy * value - value of the same-value filled pages which have same content 20897157d89SXiu Jianfeng * objcg - the obj_cgroup that the compressed memory is charged to 209f999f38bSDomenico Cerasuolo * lru - handle to the pool's lru used to evict pages. 2102b281117SSeth Jennings */ 2112b281117SSeth Jennings struct zswap_entry { 2122b281117SSeth Jennings struct rb_node rbnode; 2130bb48849SDomenico Cerasuolo swp_entry_t swpentry; 2142b281117SSeth Jennings int refcount; 2152b281117SSeth Jennings unsigned int length; 216f1c54846SDan Streetman struct zswap_pool *pool; 217a85f878bSSrividya Desireddy union { 2182b281117SSeth Jennings unsigned long handle; 219a85f878bSSrividya Desireddy unsigned long value; 220a85f878bSSrividya Desireddy }; 221f4840ccfSJohannes Weiner struct obj_cgroup *objcg; 222f999f38bSDomenico Cerasuolo struct list_head lru; 2232b281117SSeth Jennings }; 2242b281117SSeth Jennings 2252b281117SSeth Jennings /* 2262b281117SSeth Jennings * The tree lock in the zswap_tree struct protects a few things: 2272b281117SSeth Jennings * - the rbtree 2282b281117SSeth Jennings * - the refcount field of each entry in the tree 2292b281117SSeth Jennings */ 2302b281117SSeth Jennings struct zswap_tree { 2312b281117SSeth Jennings struct rb_root rbroot; 2322b281117SSeth Jennings spinlock_t lock; 2332b281117SSeth Jennings }; 2342b281117SSeth Jennings 2352b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 23644c7c734SChengming Zhou static unsigned int nr_zswap_trees[MAX_SWAPFILES]; 2372b281117SSeth Jennings 238f1c54846SDan Streetman /* RCU-protected iteration */ 239f1c54846SDan Streetman static LIST_HEAD(zswap_pools); 240f1c54846SDan Streetman /* protects zswap_pools list modification */ 241f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock); 24232a4e169SDan Streetman /* pool counter to provide unique names to zpool */ 24332a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0); 244f1c54846SDan Streetman 2459021ccecSLiu Shixin enum zswap_init_type { 2469021ccecSLiu Shixin ZSWAP_UNINIT, 2479021ccecSLiu Shixin ZSWAP_INIT_SUCCEED, 2489021ccecSLiu Shixin ZSWAP_INIT_FAILED 2499021ccecSLiu Shixin }; 25090b0fc26SDan Streetman 2519021ccecSLiu Shixin static enum zswap_init_type zswap_init_state; 252d7b028f5SDan Streetman 253141fdeecSLiu Shixin /* used to ensure the integrity of initialization */ 254141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock); 255f1c54846SDan Streetman 256ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */ 257ae3d89a7SDan Streetman static bool zswap_has_pool; 258ae3d89a7SDan Streetman 259f1c54846SDan Streetman /********************************* 260f1c54846SDan Streetman * helpers and fwd declarations 261f1c54846SDan Streetman **********************************/ 262f1c54846SDan Streetman 26344c7c734SChengming Zhou static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp) 26444c7c734SChengming Zhou { 26544c7c734SChengming Zhou return &zswap_trees[swp_type(swp)][swp_offset(swp) 26644c7c734SChengming Zhou >> SWAP_ADDRESS_SPACE_SHIFT]; 26744c7c734SChengming Zhou } 26844c7c734SChengming Zhou 269f1c54846SDan Streetman #define zswap_pool_debug(msg, p) \ 270f1c54846SDan Streetman pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 271b8cf32dcSYosry Ahmed zpool_get_type((p)->zpools[0])) 272f1c54846SDan Streetman 273f1c54846SDan Streetman static bool zswap_is_full(void) 274f1c54846SDan Streetman { 275ca79b0c2SArun KS return totalram_pages() * zswap_max_pool_percent / 100 < 276f1c54846SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 277f1c54846SDan Streetman } 278f1c54846SDan Streetman 27945190f01SVitaly Wool static bool zswap_can_accept(void) 28045190f01SVitaly Wool { 28145190f01SVitaly Wool return totalram_pages() * zswap_accept_thr_percent / 100 * 28245190f01SVitaly Wool zswap_max_pool_percent / 100 > 28345190f01SVitaly Wool DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 28445190f01SVitaly Wool } 28545190f01SVitaly Wool 286b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool) 287b5ba474fSNhat Pham { 288b5ba474fSNhat Pham u64 pool_size = 0; 289b5ba474fSNhat Pham int i; 290b5ba474fSNhat Pham 291b5ba474fSNhat Pham for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 292b5ba474fSNhat Pham pool_size += zpool_get_total_size(pool->zpools[i]); 293b5ba474fSNhat Pham 294b5ba474fSNhat Pham return pool_size; 295b5ba474fSNhat Pham } 296b5ba474fSNhat Pham 297f1c54846SDan Streetman static void zswap_update_total_size(void) 298f1c54846SDan Streetman { 299f1c54846SDan Streetman struct zswap_pool *pool; 300f1c54846SDan Streetman u64 total = 0; 301f1c54846SDan Streetman 302f1c54846SDan Streetman rcu_read_lock(); 303f1c54846SDan Streetman 304f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 305b5ba474fSNhat Pham total += get_zswap_pool_size(pool); 306f1c54846SDan Streetman 307f1c54846SDan Streetman rcu_read_unlock(); 308f1c54846SDan Streetman 309f1c54846SDan Streetman zswap_pool_total_size = total; 310f1c54846SDan Streetman } 311f1c54846SDan Streetman 312a984649bSJohannes Weiner /********************************* 313a984649bSJohannes Weiner * pool functions 314a984649bSJohannes Weiner **********************************/ 315a984649bSJohannes Weiner 316a984649bSJohannes Weiner static void zswap_alloc_shrinker(struct zswap_pool *pool); 317a984649bSJohannes Weiner static void shrink_worker(struct work_struct *w); 318a984649bSJohannes Weiner 319a984649bSJohannes Weiner static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 320a984649bSJohannes Weiner { 321a984649bSJohannes Weiner int i; 322a984649bSJohannes Weiner struct zswap_pool *pool; 323a984649bSJohannes Weiner char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 324a984649bSJohannes Weiner gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 325a984649bSJohannes Weiner int ret; 326a984649bSJohannes Weiner 327a984649bSJohannes Weiner if (!zswap_has_pool) { 328a984649bSJohannes Weiner /* if either are unset, pool initialization failed, and we 329a984649bSJohannes Weiner * need both params to be set correctly before trying to 330a984649bSJohannes Weiner * create a pool. 331a984649bSJohannes Weiner */ 332a984649bSJohannes Weiner if (!strcmp(type, ZSWAP_PARAM_UNSET)) 333a984649bSJohannes Weiner return NULL; 334a984649bSJohannes Weiner if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 335a984649bSJohannes Weiner return NULL; 336a984649bSJohannes Weiner } 337a984649bSJohannes Weiner 338a984649bSJohannes Weiner pool = kzalloc(sizeof(*pool), GFP_KERNEL); 339a984649bSJohannes Weiner if (!pool) 340a984649bSJohannes Weiner return NULL; 341a984649bSJohannes Weiner 342a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 343a984649bSJohannes Weiner /* unique name for each pool specifically required by zsmalloc */ 344a984649bSJohannes Weiner snprintf(name, 38, "zswap%x", 345a984649bSJohannes Weiner atomic_inc_return(&zswap_pools_count)); 346a984649bSJohannes Weiner 347a984649bSJohannes Weiner pool->zpools[i] = zpool_create_pool(type, name, gfp); 348a984649bSJohannes Weiner if (!pool->zpools[i]) { 349a984649bSJohannes Weiner pr_err("%s zpool not available\n", type); 350a984649bSJohannes Weiner goto error; 351a984649bSJohannes Weiner } 352a984649bSJohannes Weiner } 353a984649bSJohannes Weiner pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 354a984649bSJohannes Weiner 355a984649bSJohannes Weiner strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 356a984649bSJohannes Weiner 357a984649bSJohannes Weiner pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 358a984649bSJohannes Weiner if (!pool->acomp_ctx) { 359a984649bSJohannes Weiner pr_err("percpu alloc failed\n"); 360a984649bSJohannes Weiner goto error; 361a984649bSJohannes Weiner } 362a984649bSJohannes Weiner 363a984649bSJohannes Weiner ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 364a984649bSJohannes Weiner &pool->node); 365a984649bSJohannes Weiner if (ret) 366a984649bSJohannes Weiner goto error; 367a984649bSJohannes Weiner 368a984649bSJohannes Weiner zswap_alloc_shrinker(pool); 369a984649bSJohannes Weiner if (!pool->shrinker) 370a984649bSJohannes Weiner goto error; 371a984649bSJohannes Weiner 372a984649bSJohannes Weiner pr_debug("using %s compressor\n", pool->tfm_name); 373a984649bSJohannes Weiner 374a984649bSJohannes Weiner /* being the current pool takes 1 ref; this func expects the 375a984649bSJohannes Weiner * caller to always add the new pool as the current pool 376a984649bSJohannes Weiner */ 377a984649bSJohannes Weiner kref_init(&pool->kref); 378a984649bSJohannes Weiner INIT_LIST_HEAD(&pool->list); 379a984649bSJohannes Weiner if (list_lru_init_memcg(&pool->list_lru, pool->shrinker)) 380a984649bSJohannes Weiner goto lru_fail; 381a984649bSJohannes Weiner shrinker_register(pool->shrinker); 382a984649bSJohannes Weiner INIT_WORK(&pool->shrink_work, shrink_worker); 383a984649bSJohannes Weiner atomic_set(&pool->nr_stored, 0); 384a984649bSJohannes Weiner 385a984649bSJohannes Weiner zswap_pool_debug("created", pool); 386a984649bSJohannes Weiner 387a984649bSJohannes Weiner return pool; 388a984649bSJohannes Weiner 389a984649bSJohannes Weiner lru_fail: 390a984649bSJohannes Weiner list_lru_destroy(&pool->list_lru); 391a984649bSJohannes Weiner shrinker_free(pool->shrinker); 392a984649bSJohannes Weiner error: 393a984649bSJohannes Weiner if (pool->acomp_ctx) 394a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 395a984649bSJohannes Weiner while (i--) 396a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 397a984649bSJohannes Weiner kfree(pool); 398a984649bSJohannes Weiner return NULL; 399a984649bSJohannes Weiner } 400a984649bSJohannes Weiner 401a984649bSJohannes Weiner static struct zswap_pool *__zswap_pool_create_fallback(void) 402a984649bSJohannes Weiner { 403a984649bSJohannes Weiner bool has_comp, has_zpool; 404a984649bSJohannes Weiner 405a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 406a984649bSJohannes Weiner if (!has_comp && strcmp(zswap_compressor, 407a984649bSJohannes Weiner CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 408a984649bSJohannes Weiner pr_err("compressor %s not available, using default %s\n", 409a984649bSJohannes Weiner zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 410a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 411a984649bSJohannes Weiner zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 412a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 413a984649bSJohannes Weiner } 414a984649bSJohannes Weiner if (!has_comp) { 415a984649bSJohannes Weiner pr_err("default compressor %s not available\n", 416a984649bSJohannes Weiner zswap_compressor); 417a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 418a984649bSJohannes Weiner zswap_compressor = ZSWAP_PARAM_UNSET; 419a984649bSJohannes Weiner } 420a984649bSJohannes Weiner 421a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 422a984649bSJohannes Weiner if (!has_zpool && strcmp(zswap_zpool_type, 423a984649bSJohannes Weiner CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 424a984649bSJohannes Weiner pr_err("zpool %s not available, using default %s\n", 425a984649bSJohannes Weiner zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 426a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 427a984649bSJohannes Weiner zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 428a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 429a984649bSJohannes Weiner } 430a984649bSJohannes Weiner if (!has_zpool) { 431a984649bSJohannes Weiner pr_err("default zpool %s not available\n", 432a984649bSJohannes Weiner zswap_zpool_type); 433a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 434a984649bSJohannes Weiner zswap_zpool_type = ZSWAP_PARAM_UNSET; 435a984649bSJohannes Weiner } 436a984649bSJohannes Weiner 437a984649bSJohannes Weiner if (!has_comp || !has_zpool) 438a984649bSJohannes Weiner return NULL; 439a984649bSJohannes Weiner 440a984649bSJohannes Weiner return zswap_pool_create(zswap_zpool_type, zswap_compressor); 441a984649bSJohannes Weiner } 442a984649bSJohannes Weiner 443a984649bSJohannes Weiner static void zswap_pool_destroy(struct zswap_pool *pool) 444a984649bSJohannes Weiner { 445a984649bSJohannes Weiner int i; 446a984649bSJohannes Weiner 447a984649bSJohannes Weiner zswap_pool_debug("destroying", pool); 448a984649bSJohannes Weiner 449a984649bSJohannes Weiner shrinker_free(pool->shrinker); 450a984649bSJohannes Weiner cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 451a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 452a984649bSJohannes Weiner list_lru_destroy(&pool->list_lru); 453a984649bSJohannes Weiner 454a984649bSJohannes Weiner spin_lock(&zswap_pools_lock); 455a984649bSJohannes Weiner mem_cgroup_iter_break(NULL, pool->next_shrink); 456a984649bSJohannes Weiner pool->next_shrink = NULL; 457a984649bSJohannes Weiner spin_unlock(&zswap_pools_lock); 458a984649bSJohannes Weiner 459a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 460a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 461a984649bSJohannes Weiner kfree(pool); 462a984649bSJohannes Weiner } 463a984649bSJohannes Weiner 46439f3ec8eSJohannes Weiner static void __zswap_pool_release(struct work_struct *work) 46539f3ec8eSJohannes Weiner { 46639f3ec8eSJohannes Weiner struct zswap_pool *pool = container_of(work, typeof(*pool), 46739f3ec8eSJohannes Weiner release_work); 46839f3ec8eSJohannes Weiner 46939f3ec8eSJohannes Weiner synchronize_rcu(); 47039f3ec8eSJohannes Weiner 47139f3ec8eSJohannes Weiner /* nobody should have been able to get a kref... */ 47239f3ec8eSJohannes Weiner WARN_ON(kref_get_unless_zero(&pool->kref)); 47339f3ec8eSJohannes Weiner 47439f3ec8eSJohannes Weiner /* pool is now off zswap_pools list and has no references. */ 47539f3ec8eSJohannes Weiner zswap_pool_destroy(pool); 47639f3ec8eSJohannes Weiner } 47739f3ec8eSJohannes Weiner 47839f3ec8eSJohannes Weiner static struct zswap_pool *zswap_pool_current(void); 47939f3ec8eSJohannes Weiner 48039f3ec8eSJohannes Weiner static void __zswap_pool_empty(struct kref *kref) 48139f3ec8eSJohannes Weiner { 48239f3ec8eSJohannes Weiner struct zswap_pool *pool; 48339f3ec8eSJohannes Weiner 48439f3ec8eSJohannes Weiner pool = container_of(kref, typeof(*pool), kref); 48539f3ec8eSJohannes Weiner 48639f3ec8eSJohannes Weiner spin_lock(&zswap_pools_lock); 48739f3ec8eSJohannes Weiner 48839f3ec8eSJohannes Weiner WARN_ON(pool == zswap_pool_current()); 48939f3ec8eSJohannes Weiner 49039f3ec8eSJohannes Weiner list_del_rcu(&pool->list); 49139f3ec8eSJohannes Weiner 49239f3ec8eSJohannes Weiner INIT_WORK(&pool->release_work, __zswap_pool_release); 49339f3ec8eSJohannes Weiner schedule_work(&pool->release_work); 49439f3ec8eSJohannes Weiner 49539f3ec8eSJohannes Weiner spin_unlock(&zswap_pools_lock); 49639f3ec8eSJohannes Weiner } 49739f3ec8eSJohannes Weiner 49839f3ec8eSJohannes Weiner static int __must_check zswap_pool_get(struct zswap_pool *pool) 49939f3ec8eSJohannes Weiner { 50039f3ec8eSJohannes Weiner if (!pool) 50139f3ec8eSJohannes Weiner return 0; 50239f3ec8eSJohannes Weiner 50339f3ec8eSJohannes Weiner return kref_get_unless_zero(&pool->kref); 50439f3ec8eSJohannes Weiner } 50539f3ec8eSJohannes Weiner 50639f3ec8eSJohannes Weiner static void zswap_pool_put(struct zswap_pool *pool) 50739f3ec8eSJohannes Weiner { 50839f3ec8eSJohannes Weiner kref_put(&pool->kref, __zswap_pool_empty); 50939f3ec8eSJohannes Weiner } 51039f3ec8eSJohannes Weiner 511c1a0ecb8SJohannes Weiner static struct zswap_pool *__zswap_pool_current(void) 512c1a0ecb8SJohannes Weiner { 513c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 514c1a0ecb8SJohannes Weiner 515c1a0ecb8SJohannes Weiner pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 516c1a0ecb8SJohannes Weiner WARN_ONCE(!pool && zswap_has_pool, 517c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 518c1a0ecb8SJohannes Weiner 519c1a0ecb8SJohannes Weiner return pool; 520c1a0ecb8SJohannes Weiner } 521c1a0ecb8SJohannes Weiner 522c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current(void) 523c1a0ecb8SJohannes Weiner { 524c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 525c1a0ecb8SJohannes Weiner 526c1a0ecb8SJohannes Weiner return __zswap_pool_current(); 527c1a0ecb8SJohannes Weiner } 528c1a0ecb8SJohannes Weiner 529c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current_get(void) 530c1a0ecb8SJohannes Weiner { 531c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 532c1a0ecb8SJohannes Weiner 533c1a0ecb8SJohannes Weiner rcu_read_lock(); 534c1a0ecb8SJohannes Weiner 535c1a0ecb8SJohannes Weiner pool = __zswap_pool_current(); 536c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 537c1a0ecb8SJohannes Weiner pool = NULL; 538c1a0ecb8SJohannes Weiner 539c1a0ecb8SJohannes Weiner rcu_read_unlock(); 540c1a0ecb8SJohannes Weiner 541c1a0ecb8SJohannes Weiner return pool; 542c1a0ecb8SJohannes Weiner } 543c1a0ecb8SJohannes Weiner 544c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_last_get(void) 545c1a0ecb8SJohannes Weiner { 546c1a0ecb8SJohannes Weiner struct zswap_pool *pool, *last = NULL; 547c1a0ecb8SJohannes Weiner 548c1a0ecb8SJohannes Weiner rcu_read_lock(); 549c1a0ecb8SJohannes Weiner 550c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) 551c1a0ecb8SJohannes Weiner last = pool; 552c1a0ecb8SJohannes Weiner WARN_ONCE(!last && zswap_has_pool, 553c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 554c1a0ecb8SJohannes Weiner if (!zswap_pool_get(last)) 555c1a0ecb8SJohannes Weiner last = NULL; 556c1a0ecb8SJohannes Weiner 557c1a0ecb8SJohannes Weiner rcu_read_unlock(); 558c1a0ecb8SJohannes Weiner 559c1a0ecb8SJohannes Weiner return last; 560c1a0ecb8SJohannes Weiner } 561c1a0ecb8SJohannes Weiner 562c1a0ecb8SJohannes Weiner /* type and compressor must be null-terminated */ 563c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 564c1a0ecb8SJohannes Weiner { 565c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 566c1a0ecb8SJohannes Weiner 567c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 568c1a0ecb8SJohannes Weiner 569c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) { 570c1a0ecb8SJohannes Weiner if (strcmp(pool->tfm_name, compressor)) 571c1a0ecb8SJohannes Weiner continue; 572c1a0ecb8SJohannes Weiner /* all zpools share the same type */ 573c1a0ecb8SJohannes Weiner if (strcmp(zpool_get_type(pool->zpools[0]), type)) 574c1a0ecb8SJohannes Weiner continue; 575c1a0ecb8SJohannes Weiner /* if we can't get it, it's about to be destroyed */ 576c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 577c1a0ecb8SJohannes Weiner continue; 578c1a0ecb8SJohannes Weiner return pool; 579c1a0ecb8SJohannes Weiner } 580c1a0ecb8SJohannes Weiner 581c1a0ecb8SJohannes Weiner return NULL; 582c1a0ecb8SJohannes Weiner } 583c1a0ecb8SJohannes Weiner 584abca07c0SJohannes Weiner /********************************* 585abca07c0SJohannes Weiner * param callbacks 586abca07c0SJohannes Weiner **********************************/ 587abca07c0SJohannes Weiner 588abca07c0SJohannes Weiner static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 589abca07c0SJohannes Weiner { 590abca07c0SJohannes Weiner /* no change required */ 591abca07c0SJohannes Weiner if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 592abca07c0SJohannes Weiner return false; 593abca07c0SJohannes Weiner return true; 594abca07c0SJohannes Weiner } 595abca07c0SJohannes Weiner 596abca07c0SJohannes Weiner /* val must be a null-terminated string */ 597abca07c0SJohannes Weiner static int __zswap_param_set(const char *val, const struct kernel_param *kp, 598abca07c0SJohannes Weiner char *type, char *compressor) 599abca07c0SJohannes Weiner { 600abca07c0SJohannes Weiner struct zswap_pool *pool, *put_pool = NULL; 601abca07c0SJohannes Weiner char *s = strstrip((char *)val); 602abca07c0SJohannes Weiner int ret = 0; 603abca07c0SJohannes Weiner bool new_pool = false; 604abca07c0SJohannes Weiner 605abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 606abca07c0SJohannes Weiner switch (zswap_init_state) { 607abca07c0SJohannes Weiner case ZSWAP_UNINIT: 608abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, 609abca07c0SJohannes Weiner * don't create a pool; that's done during init. 610abca07c0SJohannes Weiner */ 611abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 612abca07c0SJohannes Weiner break; 613abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 614abca07c0SJohannes Weiner new_pool = zswap_pool_changed(s, kp); 615abca07c0SJohannes Weiner break; 616abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 617abca07c0SJohannes Weiner pr_err("can't set param, initialization failed\n"); 618abca07c0SJohannes Weiner ret = -ENODEV; 619abca07c0SJohannes Weiner } 620abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 621abca07c0SJohannes Weiner 622abca07c0SJohannes Weiner /* no need to create a new pool, return directly */ 623abca07c0SJohannes Weiner if (!new_pool) 624abca07c0SJohannes Weiner return ret; 625abca07c0SJohannes Weiner 626abca07c0SJohannes Weiner if (!type) { 627abca07c0SJohannes Weiner if (!zpool_has_pool(s)) { 628abca07c0SJohannes Weiner pr_err("zpool %s not available\n", s); 629abca07c0SJohannes Weiner return -ENOENT; 630abca07c0SJohannes Weiner } 631abca07c0SJohannes Weiner type = s; 632abca07c0SJohannes Weiner } else if (!compressor) { 633abca07c0SJohannes Weiner if (!crypto_has_acomp(s, 0, 0)) { 634abca07c0SJohannes Weiner pr_err("compressor %s not available\n", s); 635abca07c0SJohannes Weiner return -ENOENT; 636abca07c0SJohannes Weiner } 637abca07c0SJohannes Weiner compressor = s; 638abca07c0SJohannes Weiner } else { 639abca07c0SJohannes Weiner WARN_ON(1); 640abca07c0SJohannes Weiner return -EINVAL; 641abca07c0SJohannes Weiner } 642abca07c0SJohannes Weiner 643abca07c0SJohannes Weiner spin_lock(&zswap_pools_lock); 644abca07c0SJohannes Weiner 645abca07c0SJohannes Weiner pool = zswap_pool_find_get(type, compressor); 646abca07c0SJohannes Weiner if (pool) { 647abca07c0SJohannes Weiner zswap_pool_debug("using existing", pool); 648abca07c0SJohannes Weiner WARN_ON(pool == zswap_pool_current()); 649abca07c0SJohannes Weiner list_del_rcu(&pool->list); 650abca07c0SJohannes Weiner } 651abca07c0SJohannes Weiner 652abca07c0SJohannes Weiner spin_unlock(&zswap_pools_lock); 653abca07c0SJohannes Weiner 654abca07c0SJohannes Weiner if (!pool) 655abca07c0SJohannes Weiner pool = zswap_pool_create(type, compressor); 656abca07c0SJohannes Weiner 657abca07c0SJohannes Weiner if (pool) 658abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 659abca07c0SJohannes Weiner else 660abca07c0SJohannes Weiner ret = -EINVAL; 661abca07c0SJohannes Weiner 662abca07c0SJohannes Weiner spin_lock(&zswap_pools_lock); 663abca07c0SJohannes Weiner 664abca07c0SJohannes Weiner if (!ret) { 665abca07c0SJohannes Weiner put_pool = zswap_pool_current(); 666abca07c0SJohannes Weiner list_add_rcu(&pool->list, &zswap_pools); 667abca07c0SJohannes Weiner zswap_has_pool = true; 668abca07c0SJohannes Weiner } else if (pool) { 669abca07c0SJohannes Weiner /* add the possibly pre-existing pool to the end of the pools 670abca07c0SJohannes Weiner * list; if it's new (and empty) then it'll be removed and 671abca07c0SJohannes Weiner * destroyed by the put after we drop the lock 672abca07c0SJohannes Weiner */ 673abca07c0SJohannes Weiner list_add_tail_rcu(&pool->list, &zswap_pools); 674abca07c0SJohannes Weiner put_pool = pool; 675abca07c0SJohannes Weiner } 676abca07c0SJohannes Weiner 677abca07c0SJohannes Weiner spin_unlock(&zswap_pools_lock); 678abca07c0SJohannes Weiner 679abca07c0SJohannes Weiner if (!zswap_has_pool && !pool) { 680abca07c0SJohannes Weiner /* if initial pool creation failed, and this pool creation also 681abca07c0SJohannes Weiner * failed, maybe both compressor and zpool params were bad. 682abca07c0SJohannes Weiner * Allow changing this param, so pool creation will succeed 683abca07c0SJohannes Weiner * when the other param is changed. We already verified this 684abca07c0SJohannes Weiner * param is ok in the zpool_has_pool() or crypto_has_acomp() 685abca07c0SJohannes Weiner * checks above. 686abca07c0SJohannes Weiner */ 687abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 688abca07c0SJohannes Weiner } 689abca07c0SJohannes Weiner 690abca07c0SJohannes Weiner /* drop the ref from either the old current pool, 691abca07c0SJohannes Weiner * or the new pool we failed to add 692abca07c0SJohannes Weiner */ 693abca07c0SJohannes Weiner if (put_pool) 694abca07c0SJohannes Weiner zswap_pool_put(put_pool); 695abca07c0SJohannes Weiner 696abca07c0SJohannes Weiner return ret; 697abca07c0SJohannes Weiner } 698abca07c0SJohannes Weiner 699abca07c0SJohannes Weiner static int zswap_compressor_param_set(const char *val, 700abca07c0SJohannes Weiner const struct kernel_param *kp) 701abca07c0SJohannes Weiner { 702abca07c0SJohannes Weiner return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 703abca07c0SJohannes Weiner } 704abca07c0SJohannes Weiner 705abca07c0SJohannes Weiner static int zswap_zpool_param_set(const char *val, 706abca07c0SJohannes Weiner const struct kernel_param *kp) 707abca07c0SJohannes Weiner { 708abca07c0SJohannes Weiner return __zswap_param_set(val, kp, NULL, zswap_compressor); 709abca07c0SJohannes Weiner } 710abca07c0SJohannes Weiner 711abca07c0SJohannes Weiner static int zswap_enabled_param_set(const char *val, 712abca07c0SJohannes Weiner const struct kernel_param *kp) 713abca07c0SJohannes Weiner { 714abca07c0SJohannes Weiner int ret = -ENODEV; 715abca07c0SJohannes Weiner 716abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, only set param. */ 717abca07c0SJohannes Weiner if (system_state != SYSTEM_RUNNING) 718abca07c0SJohannes Weiner return param_set_bool(val, kp); 719abca07c0SJohannes Weiner 720abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 721abca07c0SJohannes Weiner switch (zswap_init_state) { 722abca07c0SJohannes Weiner case ZSWAP_UNINIT: 723abca07c0SJohannes Weiner if (zswap_setup()) 724abca07c0SJohannes Weiner break; 725abca07c0SJohannes Weiner fallthrough; 726abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 727abca07c0SJohannes Weiner if (!zswap_has_pool) 728abca07c0SJohannes Weiner pr_err("can't enable, no pool configured\n"); 729abca07c0SJohannes Weiner else 730abca07c0SJohannes Weiner ret = param_set_bool(val, kp); 731abca07c0SJohannes Weiner break; 732abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 733abca07c0SJohannes Weiner pr_err("can't enable, initialization failed\n"); 734abca07c0SJohannes Weiner } 735abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 736abca07c0SJohannes Weiner 737abca07c0SJohannes Weiner return ret; 738abca07c0SJohannes Weiner } 739abca07c0SJohannes Weiner 740506a86c5SJohannes Weiner /********************************* 741506a86c5SJohannes Weiner * lru functions 742506a86c5SJohannes Weiner **********************************/ 743506a86c5SJohannes Weiner 744a65b0e76SDomenico Cerasuolo /* should be called under RCU */ 745a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG 746a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 747a65b0e76SDomenico Cerasuolo { 748a65b0e76SDomenico Cerasuolo return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; 749a65b0e76SDomenico Cerasuolo } 750a65b0e76SDomenico Cerasuolo #else 751a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 752a65b0e76SDomenico Cerasuolo { 753a65b0e76SDomenico Cerasuolo return NULL; 754a65b0e76SDomenico Cerasuolo } 755a65b0e76SDomenico Cerasuolo #endif 756a65b0e76SDomenico Cerasuolo 757a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry) 758a65b0e76SDomenico Cerasuolo { 759a65b0e76SDomenico Cerasuolo return page_to_nid(virt_to_page(entry)); 760a65b0e76SDomenico Cerasuolo } 761a65b0e76SDomenico Cerasuolo 762a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) 763a65b0e76SDomenico Cerasuolo { 764b5ba474fSNhat Pham atomic_long_t *nr_zswap_protected; 765b5ba474fSNhat Pham unsigned long lru_size, old, new; 766a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 767a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 768b5ba474fSNhat Pham struct lruvec *lruvec; 769a65b0e76SDomenico Cerasuolo 770a65b0e76SDomenico Cerasuolo /* 771a65b0e76SDomenico Cerasuolo * Note that it is safe to use rcu_read_lock() here, even in the face of 772a65b0e76SDomenico Cerasuolo * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection 773a65b0e76SDomenico Cerasuolo * used in list_lru lookup, only two scenarios are possible: 774a65b0e76SDomenico Cerasuolo * 775a65b0e76SDomenico Cerasuolo * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The 776a65b0e76SDomenico Cerasuolo * new entry will be reparented to memcg's parent's list_lru. 777a65b0e76SDomenico Cerasuolo * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The 778a65b0e76SDomenico Cerasuolo * new entry will be added directly to memcg's parent's list_lru. 779a65b0e76SDomenico Cerasuolo * 7803f798aa6SChengming Zhou * Similar reasoning holds for list_lru_del(). 781a65b0e76SDomenico Cerasuolo */ 782a65b0e76SDomenico Cerasuolo rcu_read_lock(); 783a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 784a65b0e76SDomenico Cerasuolo /* will always succeed */ 785a65b0e76SDomenico Cerasuolo list_lru_add(list_lru, &entry->lru, nid, memcg); 786b5ba474fSNhat Pham 787b5ba474fSNhat Pham /* Update the protection area */ 788b5ba474fSNhat Pham lru_size = list_lru_count_one(list_lru, nid, memcg); 789b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); 790b5ba474fSNhat Pham nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected; 791b5ba474fSNhat Pham old = atomic_long_inc_return(nr_zswap_protected); 792b5ba474fSNhat Pham /* 793b5ba474fSNhat Pham * Decay to avoid overflow and adapt to changing workloads. 794b5ba474fSNhat Pham * This is based on LRU reclaim cost decaying heuristics. 795b5ba474fSNhat Pham */ 796b5ba474fSNhat Pham do { 797b5ba474fSNhat Pham new = old > lru_size / 4 ? old / 2 : old; 798b5ba474fSNhat Pham } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new)); 799a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 800a65b0e76SDomenico Cerasuolo } 801a65b0e76SDomenico Cerasuolo 802a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) 803a65b0e76SDomenico Cerasuolo { 804a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 805a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 806a65b0e76SDomenico Cerasuolo 807a65b0e76SDomenico Cerasuolo rcu_read_lock(); 808a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 809a65b0e76SDomenico Cerasuolo /* will always succeed */ 810a65b0e76SDomenico Cerasuolo list_lru_del(list_lru, &entry->lru, nid, memcg); 811a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 812a65b0e76SDomenico Cerasuolo } 813a65b0e76SDomenico Cerasuolo 8145182661aSJohannes Weiner void zswap_lruvec_state_init(struct lruvec *lruvec) 8155182661aSJohannes Weiner { 8165182661aSJohannes Weiner atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0); 8175182661aSJohannes Weiner } 8185182661aSJohannes Weiner 8195182661aSJohannes Weiner void zswap_folio_swapin(struct folio *folio) 8205182661aSJohannes Weiner { 8215182661aSJohannes Weiner struct lruvec *lruvec; 8225182661aSJohannes Weiner 8235182661aSJohannes Weiner if (folio) { 8245182661aSJohannes Weiner lruvec = folio_lruvec(folio); 8255182661aSJohannes Weiner atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 8265182661aSJohannes Weiner } 8275182661aSJohannes Weiner } 8285182661aSJohannes Weiner 8295182661aSJohannes Weiner void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) 8305182661aSJohannes Weiner { 8315182661aSJohannes Weiner struct zswap_pool *pool; 8325182661aSJohannes Weiner 8335182661aSJohannes Weiner /* lock out zswap pools list modification */ 8345182661aSJohannes Weiner spin_lock(&zswap_pools_lock); 8355182661aSJohannes Weiner list_for_each_entry(pool, &zswap_pools, list) { 8365182661aSJohannes Weiner if (pool->next_shrink == memcg) 8375182661aSJohannes Weiner pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 8385182661aSJohannes Weiner } 8395182661aSJohannes Weiner spin_unlock(&zswap_pools_lock); 8405182661aSJohannes Weiner } 8415182661aSJohannes Weiner 8425182661aSJohannes Weiner /********************************* 8432b281117SSeth Jennings * rbtree functions 8442b281117SSeth Jennings **********************************/ 8452b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 8462b281117SSeth Jennings { 8472b281117SSeth Jennings struct rb_node *node = root->rb_node; 8482b281117SSeth Jennings struct zswap_entry *entry; 8490bb48849SDomenico Cerasuolo pgoff_t entry_offset; 8502b281117SSeth Jennings 8512b281117SSeth Jennings while (node) { 8522b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 8530bb48849SDomenico Cerasuolo entry_offset = swp_offset(entry->swpentry); 8540bb48849SDomenico Cerasuolo if (entry_offset > offset) 8552b281117SSeth Jennings node = node->rb_left; 8560bb48849SDomenico Cerasuolo else if (entry_offset < offset) 8572b281117SSeth Jennings node = node->rb_right; 8582b281117SSeth Jennings else 8592b281117SSeth Jennings return entry; 8602b281117SSeth Jennings } 8612b281117SSeth Jennings return NULL; 8622b281117SSeth Jennings } 8632b281117SSeth Jennings 8642b281117SSeth Jennings /* 8652b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 8662b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 8672b281117SSeth Jennings */ 8682b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 8692b281117SSeth Jennings struct zswap_entry **dupentry) 8702b281117SSeth Jennings { 8712b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 8722b281117SSeth Jennings struct zswap_entry *myentry; 8730bb48849SDomenico Cerasuolo pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 8742b281117SSeth Jennings 8752b281117SSeth Jennings while (*link) { 8762b281117SSeth Jennings parent = *link; 8772b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 8780bb48849SDomenico Cerasuolo myentry_offset = swp_offset(myentry->swpentry); 8790bb48849SDomenico Cerasuolo if (myentry_offset > entry_offset) 8802b281117SSeth Jennings link = &(*link)->rb_left; 8810bb48849SDomenico Cerasuolo else if (myentry_offset < entry_offset) 8822b281117SSeth Jennings link = &(*link)->rb_right; 8832b281117SSeth Jennings else { 8842b281117SSeth Jennings *dupentry = myentry; 8852b281117SSeth Jennings return -EEXIST; 8862b281117SSeth Jennings } 8872b281117SSeth Jennings } 8882b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 8892b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 8902b281117SSeth Jennings return 0; 8912b281117SSeth Jennings } 8922b281117SSeth Jennings 89318a93707SYosry Ahmed static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 8940ab0abcfSWeijie Yang { 8950ab0abcfSWeijie Yang if (!RB_EMPTY_NODE(&entry->rbnode)) { 8960ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 8970ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 89818a93707SYosry Ahmed return true; 8990ab0abcfSWeijie Yang } 90018a93707SYosry Ahmed return false; 9010ab0abcfSWeijie Yang } 9020ab0abcfSWeijie Yang 90336034bf6SJohannes Weiner /********************************* 90436034bf6SJohannes Weiner * zswap entry functions 90536034bf6SJohannes Weiner **********************************/ 90636034bf6SJohannes Weiner static struct kmem_cache *zswap_entry_cache; 90736034bf6SJohannes Weiner 90836034bf6SJohannes Weiner static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) 90936034bf6SJohannes Weiner { 91036034bf6SJohannes Weiner struct zswap_entry *entry; 91136034bf6SJohannes Weiner entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); 91236034bf6SJohannes Weiner if (!entry) 91336034bf6SJohannes Weiner return NULL; 91436034bf6SJohannes Weiner entry->refcount = 1; 91536034bf6SJohannes Weiner RB_CLEAR_NODE(&entry->rbnode); 91636034bf6SJohannes Weiner return entry; 91736034bf6SJohannes Weiner } 91836034bf6SJohannes Weiner 91936034bf6SJohannes Weiner static void zswap_entry_cache_free(struct zswap_entry *entry) 92036034bf6SJohannes Weiner { 92136034bf6SJohannes Weiner kmem_cache_free(zswap_entry_cache, entry); 92236034bf6SJohannes Weiner } 92336034bf6SJohannes Weiner 924b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 925b8cf32dcSYosry Ahmed { 926b8cf32dcSYosry Ahmed int i = 0; 927b8cf32dcSYosry Ahmed 928b8cf32dcSYosry Ahmed if (ZSWAP_NR_ZPOOLS > 1) 929b8cf32dcSYosry Ahmed i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 930b8cf32dcSYosry Ahmed 931b8cf32dcSYosry Ahmed return entry->pool->zpools[i]; 932b8cf32dcSYosry Ahmed } 933b8cf32dcSYosry Ahmed 9340ab0abcfSWeijie Yang /* 93512d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 9360ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 9370ab0abcfSWeijie Yang */ 93842398be2SJohannes Weiner static void zswap_entry_free(struct zswap_entry *entry) 9390ab0abcfSWeijie Yang { 940a85f878bSSrividya Desireddy if (!entry->length) 941a85f878bSSrividya Desireddy atomic_dec(&zswap_same_filled_pages); 942a85f878bSSrividya Desireddy else { 943a65b0e76SDomenico Cerasuolo zswap_lru_del(&entry->pool->list_lru, entry); 944b8cf32dcSYosry Ahmed zpool_free(zswap_find_zpool(entry), entry->handle); 945b5ba474fSNhat Pham atomic_dec(&entry->pool->nr_stored); 946f1c54846SDan Streetman zswap_pool_put(entry->pool); 947a85f878bSSrividya Desireddy } 9482e601e1eSJohannes Weiner if (entry->objcg) { 9492e601e1eSJohannes Weiner obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 9502e601e1eSJohannes Weiner obj_cgroup_put(entry->objcg); 9512e601e1eSJohannes Weiner } 9520ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 9530ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 954f1c54846SDan Streetman zswap_update_total_size(); 9550ab0abcfSWeijie Yang } 9560ab0abcfSWeijie Yang 9570ab0abcfSWeijie Yang /* caller must hold the tree lock */ 9580ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry) 9590ab0abcfSWeijie Yang { 960e477559cSJohannes Weiner WARN_ON_ONCE(!entry->refcount); 9610ab0abcfSWeijie Yang entry->refcount++; 9620ab0abcfSWeijie Yang } 9630ab0abcfSWeijie Yang 964dab7711fSJohannes Weiner /* caller must hold the tree lock */ 965db128f5fSYosry Ahmed static void zswap_entry_put(struct zswap_entry *entry) 9660ab0abcfSWeijie Yang { 967dab7711fSJohannes Weiner WARN_ON_ONCE(!entry->refcount); 968dab7711fSJohannes Weiner if (--entry->refcount == 0) { 96973108957SJohannes Weiner WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode)); 97042398be2SJohannes Weiner zswap_entry_free(entry); 9710ab0abcfSWeijie Yang } 9720ab0abcfSWeijie Yang } 9730ab0abcfSWeijie Yang 9747dd1f7f0SJohannes Weiner /* 9757dd1f7f0SJohannes Weiner * If the entry is still valid in the tree, drop the initial ref and remove it 9767dd1f7f0SJohannes Weiner * from the tree. This function must be called with an additional ref held, 9777dd1f7f0SJohannes Weiner * otherwise it may race with another invalidation freeing the entry. 9787dd1f7f0SJohannes Weiner */ 9797dd1f7f0SJohannes Weiner static void zswap_invalidate_entry(struct zswap_tree *tree, 9807dd1f7f0SJohannes Weiner struct zswap_entry *entry) 9817dd1f7f0SJohannes Weiner { 9827dd1f7f0SJohannes Weiner if (zswap_rb_erase(&tree->rbroot, entry)) 9837dd1f7f0SJohannes Weiner zswap_entry_put(entry); 9847dd1f7f0SJohannes Weiner } 9857dd1f7f0SJohannes Weiner 9862b281117SSeth Jennings /********************************* 987f91e81d3SJohannes Weiner * compressed storage functions 988f91e81d3SJohannes Weiner **********************************/ 98964f200b8SJohannes Weiner static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 99064f200b8SJohannes Weiner { 99164f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 99264f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 99364f200b8SJohannes Weiner struct crypto_acomp *acomp; 99464f200b8SJohannes Weiner struct acomp_req *req; 99564f200b8SJohannes Weiner int ret; 99664f200b8SJohannes Weiner 99764f200b8SJohannes Weiner mutex_init(&acomp_ctx->mutex); 99864f200b8SJohannes Weiner 99964f200b8SJohannes Weiner acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 100064f200b8SJohannes Weiner if (!acomp_ctx->buffer) 100164f200b8SJohannes Weiner return -ENOMEM; 100264f200b8SJohannes Weiner 100364f200b8SJohannes Weiner acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 100464f200b8SJohannes Weiner if (IS_ERR(acomp)) { 100564f200b8SJohannes Weiner pr_err("could not alloc crypto acomp %s : %ld\n", 100664f200b8SJohannes Weiner pool->tfm_name, PTR_ERR(acomp)); 100764f200b8SJohannes Weiner ret = PTR_ERR(acomp); 100864f200b8SJohannes Weiner goto acomp_fail; 100964f200b8SJohannes Weiner } 101064f200b8SJohannes Weiner acomp_ctx->acomp = acomp; 101164f200b8SJohannes Weiner 101264f200b8SJohannes Weiner req = acomp_request_alloc(acomp_ctx->acomp); 101364f200b8SJohannes Weiner if (!req) { 101464f200b8SJohannes Weiner pr_err("could not alloc crypto acomp_request %s\n", 101564f200b8SJohannes Weiner pool->tfm_name); 101664f200b8SJohannes Weiner ret = -ENOMEM; 101764f200b8SJohannes Weiner goto req_fail; 101864f200b8SJohannes Weiner } 101964f200b8SJohannes Weiner acomp_ctx->req = req; 102064f200b8SJohannes Weiner 102164f200b8SJohannes Weiner crypto_init_wait(&acomp_ctx->wait); 102264f200b8SJohannes Weiner /* 102364f200b8SJohannes Weiner * if the backend of acomp is async zip, crypto_req_done() will wakeup 102464f200b8SJohannes Weiner * crypto_wait_req(); if the backend of acomp is scomp, the callback 102564f200b8SJohannes Weiner * won't be called, crypto_wait_req() will return without blocking. 102664f200b8SJohannes Weiner */ 102764f200b8SJohannes Weiner acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 102864f200b8SJohannes Weiner crypto_req_done, &acomp_ctx->wait); 102964f200b8SJohannes Weiner 103064f200b8SJohannes Weiner return 0; 103164f200b8SJohannes Weiner 103264f200b8SJohannes Weiner req_fail: 103364f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 103464f200b8SJohannes Weiner acomp_fail: 103564f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 103664f200b8SJohannes Weiner return ret; 103764f200b8SJohannes Weiner } 103864f200b8SJohannes Weiner 103964f200b8SJohannes Weiner static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 104064f200b8SJohannes Weiner { 104164f200b8SJohannes Weiner struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 104264f200b8SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 104364f200b8SJohannes Weiner 104464f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx)) { 104564f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->req)) 104664f200b8SJohannes Weiner acomp_request_free(acomp_ctx->req); 104764f200b8SJohannes Weiner if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 104864f200b8SJohannes Weiner crypto_free_acomp(acomp_ctx->acomp); 104964f200b8SJohannes Weiner kfree(acomp_ctx->buffer); 105064f200b8SJohannes Weiner } 105164f200b8SJohannes Weiner 105264f200b8SJohannes Weiner return 0; 105364f200b8SJohannes Weiner } 105464f200b8SJohannes Weiner 1055f91e81d3SJohannes Weiner static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) 1056f91e81d3SJohannes Weiner { 1057f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1058f91e81d3SJohannes Weiner struct scatterlist input, output; 1059f91e81d3SJohannes Weiner unsigned int dlen = PAGE_SIZE; 1060f91e81d3SJohannes Weiner unsigned long handle; 1061f91e81d3SJohannes Weiner struct zpool *zpool; 1062f91e81d3SJohannes Weiner char *buf; 1063f91e81d3SJohannes Weiner gfp_t gfp; 1064f91e81d3SJohannes Weiner int ret; 1065f91e81d3SJohannes Weiner u8 *dst; 1066f91e81d3SJohannes Weiner 1067f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1068f91e81d3SJohannes Weiner 1069f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1070f91e81d3SJohannes Weiner 1071f91e81d3SJohannes Weiner dst = acomp_ctx->buffer; 1072f91e81d3SJohannes Weiner sg_init_table(&input, 1); 1073f91e81d3SJohannes Weiner sg_set_page(&input, &folio->page, PAGE_SIZE, 0); 1074f91e81d3SJohannes Weiner 1075f91e81d3SJohannes Weiner /* 1076f91e81d3SJohannes Weiner * We need PAGE_SIZE * 2 here since there maybe over-compression case, 1077f91e81d3SJohannes Weiner * and hardware-accelerators may won't check the dst buffer size, so 1078f91e81d3SJohannes Weiner * giving the dst buffer with enough length to avoid buffer overflow. 1079f91e81d3SJohannes Weiner */ 1080f91e81d3SJohannes Weiner sg_init_one(&output, dst, PAGE_SIZE * 2); 1081f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1082f91e81d3SJohannes Weiner 1083f91e81d3SJohannes Weiner /* 1084f91e81d3SJohannes Weiner * it maybe looks a little bit silly that we send an asynchronous request, 1085f91e81d3SJohannes Weiner * then wait for its completion synchronously. This makes the process look 1086f91e81d3SJohannes Weiner * synchronous in fact. 1087f91e81d3SJohannes Weiner * Theoretically, acomp supports users send multiple acomp requests in one 1088f91e81d3SJohannes Weiner * acomp instance, then get those requests done simultaneously. but in this 1089f91e81d3SJohannes Weiner * case, zswap actually does store and load page by page, there is no 1090f91e81d3SJohannes Weiner * existing method to send the second page before the first page is done 1091f91e81d3SJohannes Weiner * in one thread doing zwap. 1092f91e81d3SJohannes Weiner * but in different threads running on different cpu, we have different 1093f91e81d3SJohannes Weiner * acomp instance, so multiple threads can do (de)compression in parallel. 1094f91e81d3SJohannes Weiner */ 1095f91e81d3SJohannes Weiner ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1096f91e81d3SJohannes Weiner dlen = acomp_ctx->req->dlen; 1097f91e81d3SJohannes Weiner if (ret) { 1098f91e81d3SJohannes Weiner zswap_reject_compress_fail++; 1099f91e81d3SJohannes Weiner goto unlock; 1100f91e81d3SJohannes Weiner } 1101f91e81d3SJohannes Weiner 1102f91e81d3SJohannes Weiner zpool = zswap_find_zpool(entry); 1103f91e81d3SJohannes Weiner gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1104f91e81d3SJohannes Weiner if (zpool_malloc_support_movable(zpool)) 1105f91e81d3SJohannes Weiner gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1106f91e81d3SJohannes Weiner ret = zpool_malloc(zpool, dlen, gfp, &handle); 1107f91e81d3SJohannes Weiner if (ret == -ENOSPC) { 1108f91e81d3SJohannes Weiner zswap_reject_compress_poor++; 1109f91e81d3SJohannes Weiner goto unlock; 1110f91e81d3SJohannes Weiner } 1111f91e81d3SJohannes Weiner if (ret) { 1112f91e81d3SJohannes Weiner zswap_reject_alloc_fail++; 1113f91e81d3SJohannes Weiner goto unlock; 1114f91e81d3SJohannes Weiner } 1115f91e81d3SJohannes Weiner 1116f91e81d3SJohannes Weiner buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 1117f91e81d3SJohannes Weiner memcpy(buf, dst, dlen); 1118f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, handle); 1119f91e81d3SJohannes Weiner 1120f91e81d3SJohannes Weiner entry->handle = handle; 1121f91e81d3SJohannes Weiner entry->length = dlen; 1122f91e81d3SJohannes Weiner 1123f91e81d3SJohannes Weiner unlock: 1124f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 1125f91e81d3SJohannes Weiner return ret == 0; 1126f91e81d3SJohannes Weiner } 1127f91e81d3SJohannes Weiner 1128f91e81d3SJohannes Weiner static void zswap_decompress(struct zswap_entry *entry, struct page *page) 1129f91e81d3SJohannes Weiner { 1130f91e81d3SJohannes Weiner struct zpool *zpool = zswap_find_zpool(entry); 1131f91e81d3SJohannes Weiner struct scatterlist input, output; 1132f91e81d3SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1133f91e81d3SJohannes Weiner u8 *src; 1134f91e81d3SJohannes Weiner 1135f91e81d3SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1136f91e81d3SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1137f91e81d3SJohannes Weiner 1138f91e81d3SJohannes Weiner src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 1139f91e81d3SJohannes Weiner if (!zpool_can_sleep_mapped(zpool)) { 1140f91e81d3SJohannes Weiner memcpy(acomp_ctx->buffer, src, entry->length); 1141f91e81d3SJohannes Weiner src = acomp_ctx->buffer; 1142f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1143f91e81d3SJohannes Weiner } 1144f91e81d3SJohannes Weiner 1145f91e81d3SJohannes Weiner sg_init_one(&input, src, entry->length); 1146f91e81d3SJohannes Weiner sg_init_table(&output, 1); 1147f91e81d3SJohannes Weiner sg_set_page(&output, page, PAGE_SIZE, 0); 1148f91e81d3SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); 1149f91e81d3SJohannes Weiner BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); 1150f91e81d3SJohannes Weiner BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); 1151f91e81d3SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 1152f91e81d3SJohannes Weiner 1153f91e81d3SJohannes Weiner if (zpool_can_sleep_mapped(zpool)) 1154f91e81d3SJohannes Weiner zpool_unmap_handle(zpool, entry->handle); 1155f91e81d3SJohannes Weiner } 1156f91e81d3SJohannes Weiner 1157f91e81d3SJohannes Weiner /********************************* 11589986d35dSJohannes Weiner * writeback code 11599986d35dSJohannes Weiner **********************************/ 11609986d35dSJohannes Weiner /* 11619986d35dSJohannes Weiner * Attempts to free an entry by adding a folio to the swap cache, 11629986d35dSJohannes Weiner * decompressing the entry data into the folio, and issuing a 11639986d35dSJohannes Weiner * bio write to write the folio back to the swap device. 11649986d35dSJohannes Weiner * 11659986d35dSJohannes Weiner * This can be thought of as a "resumed writeback" of the folio 11669986d35dSJohannes Weiner * to the swap device. We are basically resuming the same swap 11679986d35dSJohannes Weiner * writeback path that was intercepted with the zswap_store() 11689986d35dSJohannes Weiner * in the first place. After the folio has been decompressed into 11699986d35dSJohannes Weiner * the swap cache, the compressed version stored by zswap can be 11709986d35dSJohannes Weiner * freed. 11719986d35dSJohannes Weiner */ 11729986d35dSJohannes Weiner static int zswap_writeback_entry(struct zswap_entry *entry, 11739986d35dSJohannes Weiner swp_entry_t swpentry) 11749986d35dSJohannes Weiner { 11759986d35dSJohannes Weiner struct zswap_tree *tree; 11769986d35dSJohannes Weiner struct folio *folio; 11779986d35dSJohannes Weiner struct mempolicy *mpol; 11789986d35dSJohannes Weiner bool folio_was_allocated; 11799986d35dSJohannes Weiner struct writeback_control wbc = { 11809986d35dSJohannes Weiner .sync_mode = WB_SYNC_NONE, 11819986d35dSJohannes Weiner }; 11829986d35dSJohannes Weiner 11839986d35dSJohannes Weiner /* try to allocate swap cache folio */ 11849986d35dSJohannes Weiner mpol = get_task_policy(current); 11859986d35dSJohannes Weiner folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 11869986d35dSJohannes Weiner NO_INTERLEAVE_INDEX, &folio_was_allocated, true); 11879986d35dSJohannes Weiner if (!folio) 11889986d35dSJohannes Weiner return -ENOMEM; 11899986d35dSJohannes Weiner 11909986d35dSJohannes Weiner /* 11919986d35dSJohannes Weiner * Found an existing folio, we raced with swapin or concurrent 11929986d35dSJohannes Weiner * shrinker. We generally writeback cold folios from zswap, and 11939986d35dSJohannes Weiner * swapin means the folio just became hot, so skip this folio. 11949986d35dSJohannes Weiner * For unlikely concurrent shrinker case, it will be unlinked 11959986d35dSJohannes Weiner * and freed when invalidated by the concurrent shrinker anyway. 11969986d35dSJohannes Weiner */ 11979986d35dSJohannes Weiner if (!folio_was_allocated) { 11989986d35dSJohannes Weiner folio_put(folio); 11999986d35dSJohannes Weiner return -EEXIST; 12009986d35dSJohannes Weiner } 12019986d35dSJohannes Weiner 12029986d35dSJohannes Weiner /* 12039986d35dSJohannes Weiner * folio is locked, and the swapcache is now secured against 1204f9c0f1c3SChengming Zhou * concurrent swapping to and from the slot, and concurrent 1205f9c0f1c3SChengming Zhou * swapoff so we can safely dereference the zswap tree here. 1206f9c0f1c3SChengming Zhou * Verify that the swap entry hasn't been invalidated and recycled 1207f9c0f1c3SChengming Zhou * behind our backs, to avoid overwriting a new swap folio with 1208f9c0f1c3SChengming Zhou * old compressed data. Only when this is successful can the entry 1209f9c0f1c3SChengming Zhou * be dereferenced. 12109986d35dSJohannes Weiner */ 12119986d35dSJohannes Weiner tree = swap_zswap_tree(swpentry); 12129986d35dSJohannes Weiner spin_lock(&tree->lock); 12139986d35dSJohannes Weiner if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) { 12149986d35dSJohannes Weiner spin_unlock(&tree->lock); 12159986d35dSJohannes Weiner delete_from_swap_cache(folio); 12169986d35dSJohannes Weiner folio_unlock(folio); 12179986d35dSJohannes Weiner folio_put(folio); 12189986d35dSJohannes Weiner return -ENOMEM; 12199986d35dSJohannes Weiner } 12209986d35dSJohannes Weiner 12219986d35dSJohannes Weiner /* Safe to deref entry after the entry is verified above. */ 12229986d35dSJohannes Weiner zswap_entry_get(entry); 12239986d35dSJohannes Weiner spin_unlock(&tree->lock); 12249986d35dSJohannes Weiner 12259986d35dSJohannes Weiner zswap_decompress(entry, &folio->page); 12269986d35dSJohannes Weiner 12279986d35dSJohannes Weiner count_vm_event(ZSWPWB); 12289986d35dSJohannes Weiner if (entry->objcg) 12299986d35dSJohannes Weiner count_objcg_event(entry->objcg, ZSWPWB); 12309986d35dSJohannes Weiner 12319986d35dSJohannes Weiner spin_lock(&tree->lock); 12329986d35dSJohannes Weiner zswap_invalidate_entry(tree, entry); 12339986d35dSJohannes Weiner zswap_entry_put(entry); 12349986d35dSJohannes Weiner spin_unlock(&tree->lock); 12359986d35dSJohannes Weiner 12369986d35dSJohannes Weiner /* folio is up to date */ 12379986d35dSJohannes Weiner folio_mark_uptodate(folio); 12389986d35dSJohannes Weiner 12399986d35dSJohannes Weiner /* move it to the tail of the inactive list after end_writeback */ 12409986d35dSJohannes Weiner folio_set_reclaim(folio); 12419986d35dSJohannes Weiner 12429986d35dSJohannes Weiner /* start writeback */ 12439986d35dSJohannes Weiner __swap_writepage(folio, &wbc); 12449986d35dSJohannes Weiner folio_put(folio); 12459986d35dSJohannes Weiner 12469986d35dSJohannes Weiner return 0; 12479986d35dSJohannes Weiner } 12489986d35dSJohannes Weiner 12499986d35dSJohannes Weiner /********************************* 1250b5ba474fSNhat Pham * shrinker functions 1251b5ba474fSNhat Pham **********************************/ 1252b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 1253eb23ee4fSJohannes Weiner spinlock_t *lock, void *arg) 1254eb23ee4fSJohannes Weiner { 1255eb23ee4fSJohannes Weiner struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); 1256eb23ee4fSJohannes Weiner bool *encountered_page_in_swapcache = (bool *)arg; 1257eb23ee4fSJohannes Weiner swp_entry_t swpentry; 1258eb23ee4fSJohannes Weiner enum lru_status ret = LRU_REMOVED_RETRY; 1259eb23ee4fSJohannes Weiner int writeback_result; 1260eb23ee4fSJohannes Weiner 1261eb23ee4fSJohannes Weiner /* 1262f9c0f1c3SChengming Zhou * As soon as we drop the LRU lock, the entry can be freed by 1263f9c0f1c3SChengming Zhou * a concurrent invalidation. This means the following: 1264eb23ee4fSJohannes Weiner * 1265f9c0f1c3SChengming Zhou * 1. We extract the swp_entry_t to the stack, allowing 1266f9c0f1c3SChengming Zhou * zswap_writeback_entry() to pin the swap entry and 1267f9c0f1c3SChengming Zhou * then validate the zwap entry against that swap entry's 1268f9c0f1c3SChengming Zhou * tree using pointer value comparison. Only when that 1269f9c0f1c3SChengming Zhou * is successful can the entry be dereferenced. 1270f9c0f1c3SChengming Zhou * 1271f9c0f1c3SChengming Zhou * 2. Usually, objects are taken off the LRU for reclaim. In 1272f9c0f1c3SChengming Zhou * this case this isn't possible, because if reclaim fails 1273f9c0f1c3SChengming Zhou * for whatever reason, we have no means of knowing if the 1274f9c0f1c3SChengming Zhou * entry is alive to put it back on the LRU. 1275f9c0f1c3SChengming Zhou * 1276f9c0f1c3SChengming Zhou * So rotate it before dropping the lock. If the entry is 1277f9c0f1c3SChengming Zhou * written back or invalidated, the free path will unlink 1278f9c0f1c3SChengming Zhou * it. For failures, rotation is the right thing as well. 1279eb23ee4fSJohannes Weiner * 1280eb23ee4fSJohannes Weiner * Temporary failures, where the same entry should be tried 1281eb23ee4fSJohannes Weiner * again immediately, almost never happen for this shrinker. 1282eb23ee4fSJohannes Weiner * We don't do any trylocking; -ENOMEM comes closest, 1283eb23ee4fSJohannes Weiner * but that's extremely rare and doesn't happen spuriously 1284eb23ee4fSJohannes Weiner * either. Don't bother distinguishing this case. 1285eb23ee4fSJohannes Weiner */ 1286eb23ee4fSJohannes Weiner list_move_tail(item, &l->list); 1287eb23ee4fSJohannes Weiner 1288eb23ee4fSJohannes Weiner /* 1289eb23ee4fSJohannes Weiner * Once the lru lock is dropped, the entry might get freed. The 1290eb23ee4fSJohannes Weiner * swpentry is copied to the stack, and entry isn't deref'd again 1291eb23ee4fSJohannes Weiner * until the entry is verified to still be alive in the tree. 1292eb23ee4fSJohannes Weiner */ 1293eb23ee4fSJohannes Weiner swpentry = entry->swpentry; 1294eb23ee4fSJohannes Weiner 1295eb23ee4fSJohannes Weiner /* 1296eb23ee4fSJohannes Weiner * It's safe to drop the lock here because we return either 1297eb23ee4fSJohannes Weiner * LRU_REMOVED_RETRY or LRU_RETRY. 1298eb23ee4fSJohannes Weiner */ 1299eb23ee4fSJohannes Weiner spin_unlock(lock); 1300eb23ee4fSJohannes Weiner 1301eb23ee4fSJohannes Weiner writeback_result = zswap_writeback_entry(entry, swpentry); 1302eb23ee4fSJohannes Weiner 1303eb23ee4fSJohannes Weiner if (writeback_result) { 1304eb23ee4fSJohannes Weiner zswap_reject_reclaim_fail++; 1305eb23ee4fSJohannes Weiner ret = LRU_RETRY; 1306eb23ee4fSJohannes Weiner 1307eb23ee4fSJohannes Weiner /* 1308eb23ee4fSJohannes Weiner * Encountering a page already in swap cache is a sign that we are shrinking 1309eb23ee4fSJohannes Weiner * into the warmer region. We should terminate shrinking (if we're in the dynamic 1310eb23ee4fSJohannes Weiner * shrinker context). 1311eb23ee4fSJohannes Weiner */ 1312b49547adSChengming Zhou if (writeback_result == -EEXIST && encountered_page_in_swapcache) { 1313b49547adSChengming Zhou ret = LRU_STOP; 1314eb23ee4fSJohannes Weiner *encountered_page_in_swapcache = true; 1315b49547adSChengming Zhou } 1316eb23ee4fSJohannes Weiner } else { 1317eb23ee4fSJohannes Weiner zswap_written_back_pages++; 1318eb23ee4fSJohannes Weiner } 1319eb23ee4fSJohannes Weiner 1320eb23ee4fSJohannes Weiner spin_lock(lock); 1321eb23ee4fSJohannes Weiner return ret; 1322eb23ee4fSJohannes Weiner } 1323b5ba474fSNhat Pham 1324b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, 1325b5ba474fSNhat Pham struct shrink_control *sc) 1326b5ba474fSNhat Pham { 1327b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); 1328b5ba474fSNhat Pham unsigned long shrink_ret, nr_protected, lru_size; 1329b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 1330b5ba474fSNhat Pham bool encountered_page_in_swapcache = false; 1331b5ba474fSNhat Pham 1332501a06feSNhat Pham if (!zswap_shrinker_enabled || 1333501a06feSNhat Pham !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { 1334b5ba474fSNhat Pham sc->nr_scanned = 0; 1335b5ba474fSNhat Pham return SHRINK_STOP; 1336b5ba474fSNhat Pham } 1337b5ba474fSNhat Pham 1338b5ba474fSNhat Pham nr_protected = 1339b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1340b5ba474fSNhat Pham lru_size = list_lru_shrink_count(&pool->list_lru, sc); 1341b5ba474fSNhat Pham 1342b5ba474fSNhat Pham /* 1343b5ba474fSNhat Pham * Abort if we are shrinking into the protected region. 1344b5ba474fSNhat Pham * 1345b5ba474fSNhat Pham * This short-circuiting is necessary because if we have too many multiple 1346b5ba474fSNhat Pham * concurrent reclaimers getting the freeable zswap object counts at the 1347b5ba474fSNhat Pham * same time (before any of them made reasonable progress), the total 1348b5ba474fSNhat Pham * number of reclaimed objects might be more than the number of unprotected 1349b5ba474fSNhat Pham * objects (i.e the reclaimers will reclaim into the protected area of the 1350b5ba474fSNhat Pham * zswap LRU). 1351b5ba474fSNhat Pham */ 1352b5ba474fSNhat Pham if (nr_protected >= lru_size - sc->nr_to_scan) { 1353b5ba474fSNhat Pham sc->nr_scanned = 0; 1354b5ba474fSNhat Pham return SHRINK_STOP; 1355b5ba474fSNhat Pham } 1356b5ba474fSNhat Pham 1357b5ba474fSNhat Pham shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb, 1358b5ba474fSNhat Pham &encountered_page_in_swapcache); 1359b5ba474fSNhat Pham 1360b5ba474fSNhat Pham if (encountered_page_in_swapcache) 1361b5ba474fSNhat Pham return SHRINK_STOP; 1362b5ba474fSNhat Pham 1363b5ba474fSNhat Pham return shrink_ret ? shrink_ret : SHRINK_STOP; 1364b5ba474fSNhat Pham } 1365b5ba474fSNhat Pham 1366b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker, 1367b5ba474fSNhat Pham struct shrink_control *sc) 1368b5ba474fSNhat Pham { 1369b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 1370b5ba474fSNhat Pham struct mem_cgroup *memcg = sc->memcg; 1371b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); 1372b5ba474fSNhat Pham unsigned long nr_backing, nr_stored, nr_freeable, nr_protected; 1373b5ba474fSNhat Pham 1374501a06feSNhat Pham if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) 1375b5ba474fSNhat Pham return 0; 1376b5ba474fSNhat Pham 1377b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM 13787d7ef0a4SYosry Ahmed mem_cgroup_flush_stats(memcg); 1379b5ba474fSNhat Pham nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; 1380b5ba474fSNhat Pham nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); 1381b5ba474fSNhat Pham #else 1382b5ba474fSNhat Pham /* use pool stats instead of memcg stats */ 1383b5ba474fSNhat Pham nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT; 1384b5ba474fSNhat Pham nr_stored = atomic_read(&pool->nr_stored); 1385b5ba474fSNhat Pham #endif 1386b5ba474fSNhat Pham 1387b5ba474fSNhat Pham if (!nr_stored) 1388b5ba474fSNhat Pham return 0; 1389b5ba474fSNhat Pham 1390b5ba474fSNhat Pham nr_protected = 1391b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1392b5ba474fSNhat Pham nr_freeable = list_lru_shrink_count(&pool->list_lru, sc); 1393b5ba474fSNhat Pham /* 1394b5ba474fSNhat Pham * Subtract the lru size by an estimate of the number of pages 1395b5ba474fSNhat Pham * that should be protected. 1396b5ba474fSNhat Pham */ 1397b5ba474fSNhat Pham nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0; 1398b5ba474fSNhat Pham 1399b5ba474fSNhat Pham /* 1400b5ba474fSNhat Pham * Scale the number of freeable pages by the memory saving factor. 1401b5ba474fSNhat Pham * This ensures that the better zswap compresses memory, the fewer 1402b5ba474fSNhat Pham * pages we will evict to swap (as it will otherwise incur IO for 1403b5ba474fSNhat Pham * relatively small memory saving). 1404b5ba474fSNhat Pham */ 1405b5ba474fSNhat Pham return mult_frac(nr_freeable, nr_backing, nr_stored); 1406b5ba474fSNhat Pham } 1407b5ba474fSNhat Pham 1408b5ba474fSNhat Pham static void zswap_alloc_shrinker(struct zswap_pool *pool) 1409b5ba474fSNhat Pham { 1410b5ba474fSNhat Pham pool->shrinker = 1411b5ba474fSNhat Pham shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); 1412b5ba474fSNhat Pham if (!pool->shrinker) 1413b5ba474fSNhat Pham return; 1414b5ba474fSNhat Pham 1415b5ba474fSNhat Pham pool->shrinker->private_data = pool; 1416b5ba474fSNhat Pham pool->shrinker->scan_objects = zswap_shrinker_scan; 1417b5ba474fSNhat Pham pool->shrinker->count_objects = zswap_shrinker_count; 1418b5ba474fSNhat Pham pool->shrinker->batch = 0; 1419b5ba474fSNhat Pham pool->shrinker->seeks = DEFAULT_SEEKS; 1420b5ba474fSNhat Pham } 1421b5ba474fSNhat Pham 1422a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg) 1423a65b0e76SDomenico Cerasuolo { 1424a65b0e76SDomenico Cerasuolo struct zswap_pool *pool; 1425a65b0e76SDomenico Cerasuolo int nid, shrunk = 0; 1426a65b0e76SDomenico Cerasuolo 1427501a06feSNhat Pham if (!mem_cgroup_zswap_writeback_enabled(memcg)) 1428501a06feSNhat Pham return -EINVAL; 1429501a06feSNhat Pham 1430a65b0e76SDomenico Cerasuolo /* 1431a65b0e76SDomenico Cerasuolo * Skip zombies because their LRUs are reparented and we would be 1432a65b0e76SDomenico Cerasuolo * reclaiming from the parent instead of the dead memcg. 1433a65b0e76SDomenico Cerasuolo */ 1434a65b0e76SDomenico Cerasuolo if (memcg && !mem_cgroup_online(memcg)) 1435a65b0e76SDomenico Cerasuolo return -ENOENT; 1436a65b0e76SDomenico Cerasuolo 1437a65b0e76SDomenico Cerasuolo pool = zswap_pool_current_get(); 1438a65b0e76SDomenico Cerasuolo if (!pool) 1439a65b0e76SDomenico Cerasuolo return -EINVAL; 1440a65b0e76SDomenico Cerasuolo 1441a65b0e76SDomenico Cerasuolo for_each_node_state(nid, N_NORMAL_MEMORY) { 1442a65b0e76SDomenico Cerasuolo unsigned long nr_to_walk = 1; 1443a65b0e76SDomenico Cerasuolo 1444a65b0e76SDomenico Cerasuolo shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg, 1445a65b0e76SDomenico Cerasuolo &shrink_memcg_cb, NULL, &nr_to_walk); 1446a65b0e76SDomenico Cerasuolo } 1447a65b0e76SDomenico Cerasuolo zswap_pool_put(pool); 1448a65b0e76SDomenico Cerasuolo return shrunk ? 0 : -EAGAIN; 1449f999f38bSDomenico Cerasuolo } 1450f999f38bSDomenico Cerasuolo 145145190f01SVitaly Wool static void shrink_worker(struct work_struct *w) 145245190f01SVitaly Wool { 145345190f01SVitaly Wool struct zswap_pool *pool = container_of(w, typeof(*pool), 145445190f01SVitaly Wool shrink_work); 1455a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 1456e0228d59SDomenico Cerasuolo int ret, failures = 0; 145745190f01SVitaly Wool 1458a65b0e76SDomenico Cerasuolo /* global reclaim will select cgroup in a round-robin fashion. */ 1459e0228d59SDomenico Cerasuolo do { 1460a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 1461a65b0e76SDomenico Cerasuolo pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 1462a65b0e76SDomenico Cerasuolo memcg = pool->next_shrink; 1463a65b0e76SDomenico Cerasuolo 1464a65b0e76SDomenico Cerasuolo /* 1465a65b0e76SDomenico Cerasuolo * We need to retry if we have gone through a full round trip, or if we 1466a65b0e76SDomenico Cerasuolo * got an offline memcg (or else we risk undoing the effect of the 1467a65b0e76SDomenico Cerasuolo * zswap memcg offlining cleanup callback). This is not catastrophic 1468a65b0e76SDomenico Cerasuolo * per se, but it will keep the now offlined memcg hostage for a while. 1469a65b0e76SDomenico Cerasuolo * 1470a65b0e76SDomenico Cerasuolo * Note that if we got an online memcg, we will keep the extra 1471a65b0e76SDomenico Cerasuolo * reference in case the original reference obtained by mem_cgroup_iter 1472a65b0e76SDomenico Cerasuolo * is dropped by the zswap memcg offlining callback, ensuring that the 1473a65b0e76SDomenico Cerasuolo * memcg is not killed when we are reclaiming. 1474a65b0e76SDomenico Cerasuolo */ 1475a65b0e76SDomenico Cerasuolo if (!memcg) { 1476a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1477e0228d59SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1478e0228d59SDomenico Cerasuolo break; 1479a65b0e76SDomenico Cerasuolo 1480a65b0e76SDomenico Cerasuolo goto resched; 1481e0228d59SDomenico Cerasuolo } 1482a65b0e76SDomenico Cerasuolo 1483a65b0e76SDomenico Cerasuolo if (!mem_cgroup_tryget_online(memcg)) { 1484a65b0e76SDomenico Cerasuolo /* drop the reference from mem_cgroup_iter() */ 1485a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, memcg); 1486a65b0e76SDomenico Cerasuolo pool->next_shrink = NULL; 1487a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1488a65b0e76SDomenico Cerasuolo 1489a65b0e76SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1490a65b0e76SDomenico Cerasuolo break; 1491a65b0e76SDomenico Cerasuolo 1492a65b0e76SDomenico Cerasuolo goto resched; 1493a65b0e76SDomenico Cerasuolo } 1494a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1495a65b0e76SDomenico Cerasuolo 1496a65b0e76SDomenico Cerasuolo ret = shrink_memcg(memcg); 1497a65b0e76SDomenico Cerasuolo /* drop the extra reference */ 1498a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1499a65b0e76SDomenico Cerasuolo 1500a65b0e76SDomenico Cerasuolo if (ret == -EINVAL) 1501a65b0e76SDomenico Cerasuolo break; 1502a65b0e76SDomenico Cerasuolo if (ret && ++failures == MAX_RECLAIM_RETRIES) 1503a65b0e76SDomenico Cerasuolo break; 1504a65b0e76SDomenico Cerasuolo 1505a65b0e76SDomenico Cerasuolo resched: 1506e0228d59SDomenico Cerasuolo cond_resched(); 1507e0228d59SDomenico Cerasuolo } while (!zswap_can_accept()); 150845190f01SVitaly Wool zswap_pool_put(pool); 150945190f01SVitaly Wool } 151045190f01SVitaly Wool 1511a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1512a85f878bSSrividya Desireddy { 1513a85f878bSSrividya Desireddy unsigned long *page; 151462bf1258STaejoon Song unsigned long val; 151562bf1258STaejoon Song unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1516a85f878bSSrividya Desireddy 1517a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 151862bf1258STaejoon Song val = page[0]; 151962bf1258STaejoon Song 152062bf1258STaejoon Song if (val != page[last_pos]) 152162bf1258STaejoon Song return 0; 152262bf1258STaejoon Song 152362bf1258STaejoon Song for (pos = 1; pos < last_pos; pos++) { 152462bf1258STaejoon Song if (val != page[pos]) 1525a85f878bSSrividya Desireddy return 0; 1526a85f878bSSrividya Desireddy } 152762bf1258STaejoon Song 152862bf1258STaejoon Song *value = val; 152962bf1258STaejoon Song 1530a85f878bSSrividya Desireddy return 1; 1531a85f878bSSrividya Desireddy } 1532a85f878bSSrividya Desireddy 1533a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value) 1534a85f878bSSrividya Desireddy { 1535a85f878bSSrividya Desireddy unsigned long *page; 1536a85f878bSSrividya Desireddy 1537a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 1538a85f878bSSrividya Desireddy memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1539a85f878bSSrividya Desireddy } 1540a85f878bSSrividya Desireddy 154134f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio) 15422b281117SSeth Jennings { 15433d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 154442c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 154544c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 15462b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 1547f4840ccfSJohannes Weiner struct obj_cgroup *objcg = NULL; 1548a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg = NULL; 1549be7fc97cSJohannes Weiner struct zswap_pool *shrink_pool; 155042c06a0eSJohannes Weiner 155134f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 155234f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 15532b281117SSeth Jennings 155434f4c198SMatthew Wilcox (Oracle) /* Large folios aren't supported */ 155534f4c198SMatthew Wilcox (Oracle) if (folio_test_large(folio)) 155642c06a0eSJohannes Weiner return false; 15577ba71669SHuang Ying 15580bdf0efaSNhat Pham /* 1559ca56489cSDomenico Cerasuolo * If this is a duplicate, it must be removed before attempting to store 1560ca56489cSDomenico Cerasuolo * it, otherwise, if the store fails the old page won't be removed from 1561ca56489cSDomenico Cerasuolo * the tree, and it might be written back overriding the new data. 1562ca56489cSDomenico Cerasuolo */ 1563ca56489cSDomenico Cerasuolo spin_lock(&tree->lock); 1564be7fc97cSJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 15653b631bd0SChengming Zhou if (entry) 1566be7fc97cSJohannes Weiner zswap_invalidate_entry(tree, entry); 1567ca56489cSDomenico Cerasuolo spin_unlock(&tree->lock); 1568678e54d4SChengming Zhou 1569678e54d4SChengming Zhou if (!zswap_enabled) 1570678e54d4SChengming Zhou return false; 1571678e54d4SChengming Zhou 1572074e3e26SMatthew Wilcox (Oracle) objcg = get_obj_cgroup_from_folio(folio); 1573a65b0e76SDomenico Cerasuolo if (objcg && !obj_cgroup_may_zswap(objcg)) { 1574a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1575a65b0e76SDomenico Cerasuolo if (shrink_memcg(memcg)) { 1576a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 15770bdf0efaSNhat Pham goto reject; 1578a65b0e76SDomenico Cerasuolo } 1579a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1580a65b0e76SDomenico Cerasuolo } 1581f4840ccfSJohannes Weiner 15822b281117SSeth Jennings /* reclaim space if needed */ 15832b281117SSeth Jennings if (zswap_is_full()) { 15842b281117SSeth Jennings zswap_pool_limit_hit++; 158545190f01SVitaly Wool zswap_pool_reached_full = true; 1586f4840ccfSJohannes Weiner goto shrink; 15872b281117SSeth Jennings } 158816e536efSLi Wang 158945190f01SVitaly Wool if (zswap_pool_reached_full) { 159042c06a0eSJohannes Weiner if (!zswap_can_accept()) 1591e0228d59SDomenico Cerasuolo goto shrink; 159242c06a0eSJohannes Weiner else 159345190f01SVitaly Wool zswap_pool_reached_full = false; 15942b281117SSeth Jennings } 15952b281117SSeth Jennings 15962b281117SSeth Jennings /* allocate entry */ 1597be7fc97cSJohannes Weiner entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); 15982b281117SSeth Jennings if (!entry) { 15992b281117SSeth Jennings zswap_reject_kmemcache_fail++; 16002b281117SSeth Jennings goto reject; 16012b281117SSeth Jennings } 16022b281117SSeth Jennings 1603a85f878bSSrividya Desireddy if (zswap_same_filled_pages_enabled) { 1604be7fc97cSJohannes Weiner unsigned long value; 1605be7fc97cSJohannes Weiner u8 *src; 1606be7fc97cSJohannes Weiner 1607be7fc97cSJohannes Weiner src = kmap_local_folio(folio, 0); 1608a85f878bSSrividya Desireddy if (zswap_is_page_same_filled(src, &value)) { 1609003ae2fbSFabio M. De Francesco kunmap_local(src); 1610a85f878bSSrividya Desireddy entry->length = 0; 1611a85f878bSSrividya Desireddy entry->value = value; 1612a85f878bSSrividya Desireddy atomic_inc(&zswap_same_filled_pages); 1613a85f878bSSrividya Desireddy goto insert_entry; 1614a85f878bSSrividya Desireddy } 1615003ae2fbSFabio M. De Francesco kunmap_local(src); 1616a85f878bSSrividya Desireddy } 1617a85f878bSSrividya Desireddy 161842c06a0eSJohannes Weiner if (!zswap_non_same_filled_pages_enabled) 1619cb325dddSMaciej S. Szmigiero goto freepage; 1620cb325dddSMaciej S. Szmigiero 1621f1c54846SDan Streetman /* if entry is successfully added, it keeps the reference */ 1622f1c54846SDan Streetman entry->pool = zswap_pool_current_get(); 162342c06a0eSJohannes Weiner if (!entry->pool) 16242b281117SSeth Jennings goto freepage; 16252b281117SSeth Jennings 1626a65b0e76SDomenico Cerasuolo if (objcg) { 1627a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1628a65b0e76SDomenico Cerasuolo if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) { 1629a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1630a65b0e76SDomenico Cerasuolo goto put_pool; 1631a65b0e76SDomenico Cerasuolo } 1632a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1633a65b0e76SDomenico Cerasuolo } 1634a65b0e76SDomenico Cerasuolo 1635fa9ad6e2SJohannes Weiner if (!zswap_compress(folio, entry)) 1636fa9ad6e2SJohannes Weiner goto put_pool; 16371ec3b5feSBarry Song 1638a85f878bSSrividya Desireddy insert_entry: 1639be7fc97cSJohannes Weiner entry->swpentry = swp; 1640f4840ccfSJohannes Weiner entry->objcg = objcg; 1641f4840ccfSJohannes Weiner if (objcg) { 1642f4840ccfSJohannes Weiner obj_cgroup_charge_zswap(objcg, entry->length); 1643f4840ccfSJohannes Weiner /* Account before objcg ref is moved to tree */ 1644f4840ccfSJohannes Weiner count_objcg_event(objcg, ZSWPOUT); 1645f4840ccfSJohannes Weiner } 1646f4840ccfSJohannes Weiner 16472b281117SSeth Jennings /* map */ 16482b281117SSeth Jennings spin_lock(&tree->lock); 1649ca56489cSDomenico Cerasuolo /* 1650ca56489cSDomenico Cerasuolo * A duplicate entry should have been removed at the beginning of this 1651ca56489cSDomenico Cerasuolo * function. Since the swap entry should be pinned, if a duplicate is 1652ca56489cSDomenico Cerasuolo * found again here it means that something went wrong in the swap 1653ca56489cSDomenico Cerasuolo * cache. 1654ca56489cSDomenico Cerasuolo */ 165542c06a0eSJohannes Weiner while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 1656ca56489cSDomenico Cerasuolo WARN_ON(1); 165756c67049SJohannes Weiner zswap_invalidate_entry(tree, dupentry); 16582b281117SSeth Jennings } 165935499e2bSDomenico Cerasuolo if (entry->length) { 1660a65b0e76SDomenico Cerasuolo INIT_LIST_HEAD(&entry->lru); 1661a65b0e76SDomenico Cerasuolo zswap_lru_add(&entry->pool->list_lru, entry); 1662b5ba474fSNhat Pham atomic_inc(&entry->pool->nr_stored); 1663f999f38bSDomenico Cerasuolo } 16642b281117SSeth Jennings spin_unlock(&tree->lock); 16652b281117SSeth Jennings 16662b281117SSeth Jennings /* update stats */ 16672b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 1668f1c54846SDan Streetman zswap_update_total_size(); 1669f6498b77SJohannes Weiner count_vm_event(ZSWPOUT); 16702b281117SSeth Jennings 167142c06a0eSJohannes Weiner return true; 16722b281117SSeth Jennings 1673a65b0e76SDomenico Cerasuolo put_pool: 1674f1c54846SDan Streetman zswap_pool_put(entry->pool); 1675f1c54846SDan Streetman freepage: 16762b281117SSeth Jennings zswap_entry_cache_free(entry); 16772b281117SSeth Jennings reject: 1678f4840ccfSJohannes Weiner if (objcg) 1679f4840ccfSJohannes Weiner obj_cgroup_put(objcg); 168042c06a0eSJohannes Weiner return false; 1681f4840ccfSJohannes Weiner 1682f4840ccfSJohannes Weiner shrink: 1683be7fc97cSJohannes Weiner shrink_pool = zswap_pool_last_get(); 1684be7fc97cSJohannes Weiner if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work)) 1685be7fc97cSJohannes Weiner zswap_pool_put(shrink_pool); 1686f4840ccfSJohannes Weiner goto reject; 16872b281117SSeth Jennings } 16882b281117SSeth Jennings 1689ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio) 16902b281117SSeth Jennings { 16913d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 169242c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 1693ca54f6d8SMatthew Wilcox (Oracle) struct page *page = &folio->page; 169444c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 16952b281117SSeth Jennings struct zswap_entry *entry; 169632acba4cSChengming Zhou u8 *dst; 169742c06a0eSJohannes Weiner 1698ca54f6d8SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 16992b281117SSeth Jennings 17002b281117SSeth Jennings spin_lock(&tree->lock); 17015b297f70SJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 17022b281117SSeth Jennings if (!entry) { 17032b281117SSeth Jennings spin_unlock(&tree->lock); 170442c06a0eSJohannes Weiner return false; 17052b281117SSeth Jennings } 17065b297f70SJohannes Weiner zswap_entry_get(entry); 17072b281117SSeth Jennings spin_unlock(&tree->lock); 17082b281117SSeth Jennings 170966447fd0SChengming Zhou if (entry->length) 1710ff2972aaSJohannes Weiner zswap_decompress(entry, page); 171166447fd0SChengming Zhou else { 1712003ae2fbSFabio M. De Francesco dst = kmap_local_page(page); 1713a85f878bSSrividya Desireddy zswap_fill_page(dst, entry->value); 1714003ae2fbSFabio M. De Francesco kunmap_local(dst); 1715a85f878bSSrividya Desireddy } 1716a85f878bSSrividya Desireddy 1717f6498b77SJohannes Weiner count_vm_event(ZSWPIN); 1718f4840ccfSJohannes Weiner if (entry->objcg) 1719f4840ccfSJohannes Weiner count_objcg_event(entry->objcg, ZSWPIN); 1720c75f5c1eSChengming Zhou 17212b281117SSeth Jennings spin_lock(&tree->lock); 1722b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 1723db128f5fSYosry Ahmed zswap_entry_put(entry); 17242b281117SSeth Jennings spin_unlock(&tree->lock); 17252b281117SSeth Jennings 1726*c2e2ba77SChengming Zhou folio_mark_dirty(folio); 1727*c2e2ba77SChengming Zhou 172866447fd0SChengming Zhou return true; 17292b281117SSeth Jennings } 17302b281117SSeth Jennings 17310827a1fbSChengming Zhou void zswap_invalidate(swp_entry_t swp) 17322b281117SSeth Jennings { 17330827a1fbSChengming Zhou pgoff_t offset = swp_offset(swp); 17340827a1fbSChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 17352b281117SSeth Jennings struct zswap_entry *entry; 17362b281117SSeth Jennings 17372b281117SSeth Jennings spin_lock(&tree->lock); 17382b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 173906ed2289SJohannes Weiner if (entry) 1740b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 17412b281117SSeth Jennings spin_unlock(&tree->lock); 17422b281117SSeth Jennings } 17432b281117SSeth Jennings 174444c7c734SChengming Zhou int zswap_swapon(int type, unsigned long nr_pages) 174542c06a0eSJohannes Weiner { 174644c7c734SChengming Zhou struct zswap_tree *trees, *tree; 174744c7c734SChengming Zhou unsigned int nr, i; 174842c06a0eSJohannes Weiner 174944c7c734SChengming Zhou nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 175044c7c734SChengming Zhou trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); 175144c7c734SChengming Zhou if (!trees) { 175242c06a0eSJohannes Weiner pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1753bb29fd77SChengming Zhou return -ENOMEM; 175442c06a0eSJohannes Weiner } 175542c06a0eSJohannes Weiner 175644c7c734SChengming Zhou for (i = 0; i < nr; i++) { 175744c7c734SChengming Zhou tree = trees + i; 175842c06a0eSJohannes Weiner tree->rbroot = RB_ROOT; 175942c06a0eSJohannes Weiner spin_lock_init(&tree->lock); 176044c7c734SChengming Zhou } 176144c7c734SChengming Zhou 176244c7c734SChengming Zhou nr_zswap_trees[type] = nr; 176344c7c734SChengming Zhou zswap_trees[type] = trees; 1764bb29fd77SChengming Zhou return 0; 176542c06a0eSJohannes Weiner } 176642c06a0eSJohannes Weiner 176742c06a0eSJohannes Weiner void zswap_swapoff(int type) 17682b281117SSeth Jennings { 176944c7c734SChengming Zhou struct zswap_tree *trees = zswap_trees[type]; 177044c7c734SChengming Zhou unsigned int i; 17712b281117SSeth Jennings 177244c7c734SChengming Zhou if (!trees) 17732b281117SSeth Jennings return; 17742b281117SSeth Jennings 177583e68f25SYosry Ahmed /* try_to_unuse() invalidated all the entries already */ 177683e68f25SYosry Ahmed for (i = 0; i < nr_zswap_trees[type]; i++) 177783e68f25SYosry Ahmed WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot)); 177844c7c734SChengming Zhou 177944c7c734SChengming Zhou kvfree(trees); 178044c7c734SChengming Zhou nr_zswap_trees[type] = 0; 1781aa9bca05SWeijie Yang zswap_trees[type] = NULL; 17822b281117SSeth Jennings } 17832b281117SSeth Jennings 17842b281117SSeth Jennings /********************************* 17852b281117SSeth Jennings * debugfs functions 17862b281117SSeth Jennings **********************************/ 17872b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 17882b281117SSeth Jennings #include <linux/debugfs.h> 17892b281117SSeth Jennings 17902b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 17912b281117SSeth Jennings 1792141fdeecSLiu Shixin static int zswap_debugfs_init(void) 17932b281117SSeth Jennings { 17942b281117SSeth Jennings if (!debugfs_initialized()) 17952b281117SSeth Jennings return -ENODEV; 17962b281117SSeth Jennings 17972b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 17982b281117SSeth Jennings 17990825a6f9SJoe Perches debugfs_create_u64("pool_limit_hit", 0444, 18002b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 18010825a6f9SJoe Perches debugfs_create_u64("reject_reclaim_fail", 0444, 18022b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 18030825a6f9SJoe Perches debugfs_create_u64("reject_alloc_fail", 0444, 18042b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 18050825a6f9SJoe Perches debugfs_create_u64("reject_kmemcache_fail", 0444, 18062b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1807cb61dad8SNhat Pham debugfs_create_u64("reject_compress_fail", 0444, 1808cb61dad8SNhat Pham zswap_debugfs_root, &zswap_reject_compress_fail); 18090825a6f9SJoe Perches debugfs_create_u64("reject_compress_poor", 0444, 18102b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 18110825a6f9SJoe Perches debugfs_create_u64("written_back_pages", 0444, 18122b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 18130825a6f9SJoe Perches debugfs_create_u64("pool_total_size", 0444, 181412d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 18150825a6f9SJoe Perches debugfs_create_atomic_t("stored_pages", 0444, 18162b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 1817a85f878bSSrividya Desireddy debugfs_create_atomic_t("same_filled_pages", 0444, 1818a85f878bSSrividya Desireddy zswap_debugfs_root, &zswap_same_filled_pages); 18192b281117SSeth Jennings 18202b281117SSeth Jennings return 0; 18212b281117SSeth Jennings } 18222b281117SSeth Jennings #else 1823141fdeecSLiu Shixin static int zswap_debugfs_init(void) 18242b281117SSeth Jennings { 18252b281117SSeth Jennings return 0; 18262b281117SSeth Jennings } 18272b281117SSeth Jennings #endif 18282b281117SSeth Jennings 18292b281117SSeth Jennings /********************************* 18302b281117SSeth Jennings * module init and exit 18312b281117SSeth Jennings **********************************/ 1832141fdeecSLiu Shixin static int zswap_setup(void) 18332b281117SSeth Jennings { 1834f1c54846SDan Streetman struct zswap_pool *pool; 1835ad7ed770SSebastian Andrzej Siewior int ret; 183660105e12SMinchan Kim 1837b7919122SLiu Shixin zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1838b7919122SLiu Shixin if (!zswap_entry_cache) { 18392b281117SSeth Jennings pr_err("entry cache creation failed\n"); 1840f1c54846SDan Streetman goto cache_fail; 18412b281117SSeth Jennings } 1842f1c54846SDan Streetman 1843cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1844cab7a7e5SSebastian Andrzej Siewior "mm/zswap_pool:prepare", 1845cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_prepare, 1846cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_dead); 1847cab7a7e5SSebastian Andrzej Siewior if (ret) 1848cab7a7e5SSebastian Andrzej Siewior goto hp_fail; 1849cab7a7e5SSebastian Andrzej Siewior 1850f1c54846SDan Streetman pool = __zswap_pool_create_fallback(); 1851ae3d89a7SDan Streetman if (pool) { 1852f1c54846SDan Streetman pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1853b8cf32dcSYosry Ahmed zpool_get_type(pool->zpools[0])); 1854f1c54846SDan Streetman list_add(&pool->list, &zswap_pools); 1855ae3d89a7SDan Streetman zswap_has_pool = true; 1856ae3d89a7SDan Streetman } else { 1857ae3d89a7SDan Streetman pr_err("pool creation failed\n"); 1858ae3d89a7SDan Streetman zswap_enabled = false; 1859ae3d89a7SDan Streetman } 186060105e12SMinchan Kim 18618409a385SRonald Monthero shrink_wq = alloc_workqueue("zswap-shrink", 18628409a385SRonald Monthero WQ_UNBOUND|WQ_MEM_RECLAIM, 1); 186345190f01SVitaly Wool if (!shrink_wq) 186445190f01SVitaly Wool goto fallback_fail; 186545190f01SVitaly Wool 18662b281117SSeth Jennings if (zswap_debugfs_init()) 18672b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 18689021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_SUCCEED; 18692b281117SSeth Jennings return 0; 1870f1c54846SDan Streetman 187145190f01SVitaly Wool fallback_fail: 187238aeb071SDan Carpenter if (pool) 187345190f01SVitaly Wool zswap_pool_destroy(pool); 1874cab7a7e5SSebastian Andrzej Siewior hp_fail: 1875b7919122SLiu Shixin kmem_cache_destroy(zswap_entry_cache); 1876f1c54846SDan Streetman cache_fail: 1877d7b028f5SDan Streetman /* if built-in, we aren't unloaded on failure; don't allow use */ 18789021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_FAILED; 1879d7b028f5SDan Streetman zswap_enabled = false; 18802b281117SSeth Jennings return -ENOMEM; 18812b281117SSeth Jennings } 1882141fdeecSLiu Shixin 1883141fdeecSLiu Shixin static int __init zswap_init(void) 1884141fdeecSLiu Shixin { 1885141fdeecSLiu Shixin if (!zswap_enabled) 1886141fdeecSLiu Shixin return 0; 1887141fdeecSLiu Shixin return zswap_setup(); 1888141fdeecSLiu Shixin } 18892b281117SSeth Jennings /* must be late so crypto has time to come up */ 1890141fdeecSLiu Shixin late_initcall(zswap_init); 18912b281117SSeth Jennings 189268386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 18932b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 1894