1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22b281117SSeth Jennings /* 32b281117SSeth Jennings * zswap.c - zswap driver file 42b281117SSeth Jennings * 542c06a0eSJohannes Weiner * zswap is a cache that takes pages that are in the process 62b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 72b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 82b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 92b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 102b281117SSeth Jennings * 112b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 122b281117SSeth Jennings */ 132b281117SSeth Jennings 142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 152b281117SSeth Jennings 162b281117SSeth Jennings #include <linux/module.h> 172b281117SSeth Jennings #include <linux/cpu.h> 182b281117SSeth Jennings #include <linux/highmem.h> 192b281117SSeth Jennings #include <linux/slab.h> 202b281117SSeth Jennings #include <linux/spinlock.h> 212b281117SSeth Jennings #include <linux/types.h> 222b281117SSeth Jennings #include <linux/atomic.h> 232b281117SSeth Jennings #include <linux/rbtree.h> 242b281117SSeth Jennings #include <linux/swap.h> 252b281117SSeth Jennings #include <linux/crypto.h> 261ec3b5feSBarry Song #include <linux/scatterlist.h> 27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h> 282b281117SSeth Jennings #include <linux/mempool.h> 2912d79d64SDan Streetman #include <linux/zpool.h> 301ec3b5feSBarry Song #include <crypto/acompress.h> 3142c06a0eSJohannes Weiner #include <linux/zswap.h> 322b281117SSeth Jennings #include <linux/mm_types.h> 332b281117SSeth Jennings #include <linux/page-flags.h> 342b281117SSeth Jennings #include <linux/swapops.h> 352b281117SSeth Jennings #include <linux/writeback.h> 362b281117SSeth Jennings #include <linux/pagemap.h> 3745190f01SVitaly Wool #include <linux/workqueue.h> 38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h> 392b281117SSeth Jennings 40014bb1deSNeilBrown #include "swap.h" 41e0228d59SDomenico Cerasuolo #include "internal.h" 42014bb1deSNeilBrown 432b281117SSeth Jennings /********************************* 442b281117SSeth Jennings * statistics 452b281117SSeth Jennings **********************************/ 4612d79d64SDan Streetman /* Total bytes used by the compressed storage */ 47f6498b77SJohannes Weiner u64 zswap_pool_total_size; 482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0); 50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */ 51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */ 67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail; 682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 692b281117SSeth Jennings static u64 zswap_reject_compress_poor; 702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 712b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 742b281117SSeth Jennings /* Duplicate store was encountered (rare) */ 752b281117SSeth Jennings static u64 zswap_duplicate_entry; 762b281117SSeth Jennings 7745190f01SVitaly Wool /* Shrinker work queue */ 7845190f01SVitaly Wool static struct workqueue_struct *shrink_wq; 7945190f01SVitaly Wool /* Pool limit was hit, we need to calm down */ 8045190f01SVitaly Wool static bool zswap_pool_reached_full; 8145190f01SVitaly Wool 822b281117SSeth Jennings /********************************* 832b281117SSeth Jennings * tunables 842b281117SSeth Jennings **********************************/ 85c00ed16aSDan Streetman 86bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET "" 87bae21db8SDan Streetman 88141fdeecSLiu Shixin static int zswap_setup(void); 89141fdeecSLiu Shixin 90bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */ 91bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 92d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *, 93d7b028f5SDan Streetman const struct kernel_param *); 9483aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = { 95d7b028f5SDan Streetman .set = zswap_enabled_param_set, 96d7b028f5SDan Streetman .get = param_get_bool, 97d7b028f5SDan Streetman }; 98d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 992b281117SSeth Jennings 10090b0fc26SDan Streetman /* Crypto compressor to use */ 101bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 10290b0fc26SDan Streetman static int zswap_compressor_param_set(const char *, 10390b0fc26SDan Streetman const struct kernel_param *); 10483aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = { 10590b0fc26SDan Streetman .set = zswap_compressor_param_set, 106c99b42c3SDan Streetman .get = param_get_charp, 107c99b42c3SDan Streetman .free = param_free_charp, 10890b0fc26SDan Streetman }; 10990b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops, 110c99b42c3SDan Streetman &zswap_compressor, 0644); 11190b0fc26SDan Streetman 11290b0fc26SDan Streetman /* Compressed storage zpool to use */ 113bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 11490b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *); 11583aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = { 11690b0fc26SDan Streetman .set = zswap_zpool_param_set, 117c99b42c3SDan Streetman .get = param_get_charp, 118c99b42c3SDan Streetman .free = param_free_charp, 11990b0fc26SDan Streetman }; 120c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 1212b281117SSeth Jennings 1222b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 1232b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 12490b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 12560105e12SMinchan Kim 12645190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */ 12745190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 12845190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 12945190f01SVitaly Wool uint, 0644); 13045190f01SVitaly Wool 131cb325dddSMaciej S. Szmigiero /* 132cb325dddSMaciej S. Szmigiero * Enable/disable handling same-value filled pages (enabled by default). 133cb325dddSMaciej S. Szmigiero * If disabled every page is considered non-same-value filled. 134cb325dddSMaciej S. Szmigiero */ 135a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true; 136a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 137a85f878bSSrividya Desireddy bool, 0644); 138a85f878bSSrividya Desireddy 139cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */ 140cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true; 141cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 142cb325dddSMaciej S. Szmigiero bool, 0644); 143cb325dddSMaciej S. Szmigiero 144b9c91c43SYosry Ahmed static bool zswap_exclusive_loads_enabled = IS_ENABLED( 145b9c91c43SYosry Ahmed CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); 146b9c91c43SYosry Ahmed module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); 147b9c91c43SYosry Ahmed 148b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */ 149b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32 150b8cf32dcSYosry Ahmed 151b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */ 152b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED( 153b5ba474fSNhat Pham CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); 154b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); 155b5ba474fSNhat Pham 156501a06feSNhat Pham bool is_zswap_enabled(void) 157501a06feSNhat Pham { 158501a06feSNhat Pham return zswap_enabled; 159501a06feSNhat Pham } 160501a06feSNhat Pham 1612b281117SSeth Jennings /********************************* 1622b281117SSeth Jennings * data structures 1632b281117SSeth Jennings **********************************/ 164f1c54846SDan Streetman 1651ec3b5feSBarry Song struct crypto_acomp_ctx { 1661ec3b5feSBarry Song struct crypto_acomp *acomp; 1671ec3b5feSBarry Song struct acomp_req *req; 1681ec3b5feSBarry Song struct crypto_wait wait; 1698ba2f844SChengming Zhou u8 *buffer; 1708ba2f844SChengming Zhou struct mutex mutex; 1711ec3b5feSBarry Song }; 1721ec3b5feSBarry Song 173f999f38bSDomenico Cerasuolo /* 174f999f38bSDomenico Cerasuolo * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 175f999f38bSDomenico Cerasuolo * The only case where lru_lock is not acquired while holding tree.lock is 176f999f38bSDomenico Cerasuolo * when a zswap_entry is taken off the lru for writeback, in that case it 177f999f38bSDomenico Cerasuolo * needs to be verified that it's still valid in the tree. 178f999f38bSDomenico Cerasuolo */ 179f1c54846SDan Streetman struct zswap_pool { 180b8cf32dcSYosry Ahmed struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 1811ec3b5feSBarry Song struct crypto_acomp_ctx __percpu *acomp_ctx; 182f1c54846SDan Streetman struct kref kref; 183f1c54846SDan Streetman struct list_head list; 18445190f01SVitaly Wool struct work_struct release_work; 18545190f01SVitaly Wool struct work_struct shrink_work; 186cab7a7e5SSebastian Andrzej Siewior struct hlist_node node; 187f1c54846SDan Streetman char tfm_name[CRYPTO_MAX_ALG_NAME]; 188a65b0e76SDomenico Cerasuolo struct list_lru list_lru; 189a65b0e76SDomenico Cerasuolo struct mem_cgroup *next_shrink; 190b5ba474fSNhat Pham struct shrinker *shrinker; 191b5ba474fSNhat Pham atomic_t nr_stored; 192f1c54846SDan Streetman }; 193f1c54846SDan Streetman 1942b281117SSeth Jennings /* 1952b281117SSeth Jennings * struct zswap_entry 1962b281117SSeth Jennings * 1972b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1982b281117SSeth Jennings * page within zswap. 1992b281117SSeth Jennings * 2002b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 20197157d89SXiu Jianfeng * swpentry - associated swap entry, the offset indexes into the red-black tree 2022b281117SSeth Jennings * refcount - the number of outstanding reference to the entry. This is needed 2032b281117SSeth Jennings * to protect against premature freeing of the entry by code 2046b452516SSeongJae Park * concurrent calls to load, invalidate, and writeback. The lock 2052b281117SSeth Jennings * for the zswap_tree structure that contains the entry must 2062b281117SSeth Jennings * be held while changing the refcount. Since the lock must 2072b281117SSeth Jennings * be held, there is no reason to also make refcount atomic. 2082b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 209f999f38bSDomenico Cerasuolo * decompression. For a same value filled page length is 0, and both 210f999f38bSDomenico Cerasuolo * pool and lru are invalid and must be ignored. 211f1c54846SDan Streetman * pool - the zswap_pool the entry's data is in 212f1c54846SDan Streetman * handle - zpool allocation handle that stores the compressed page data 213a85f878bSSrividya Desireddy * value - value of the same-value filled pages which have same content 21497157d89SXiu Jianfeng * objcg - the obj_cgroup that the compressed memory is charged to 215f999f38bSDomenico Cerasuolo * lru - handle to the pool's lru used to evict pages. 2162b281117SSeth Jennings */ 2172b281117SSeth Jennings struct zswap_entry { 2182b281117SSeth Jennings struct rb_node rbnode; 2190bb48849SDomenico Cerasuolo swp_entry_t swpentry; 2202b281117SSeth Jennings int refcount; 2212b281117SSeth Jennings unsigned int length; 222f1c54846SDan Streetman struct zswap_pool *pool; 223a85f878bSSrividya Desireddy union { 2242b281117SSeth Jennings unsigned long handle; 225a85f878bSSrividya Desireddy unsigned long value; 226a85f878bSSrividya Desireddy }; 227f4840ccfSJohannes Weiner struct obj_cgroup *objcg; 228f999f38bSDomenico Cerasuolo struct list_head lru; 2292b281117SSeth Jennings }; 2302b281117SSeth Jennings 2312b281117SSeth Jennings /* 2322b281117SSeth Jennings * The tree lock in the zswap_tree struct protects a few things: 2332b281117SSeth Jennings * - the rbtree 2342b281117SSeth Jennings * - the refcount field of each entry in the tree 2352b281117SSeth Jennings */ 2362b281117SSeth Jennings struct zswap_tree { 2372b281117SSeth Jennings struct rb_root rbroot; 2382b281117SSeth Jennings spinlock_t lock; 2392b281117SSeth Jennings }; 2402b281117SSeth Jennings 2412b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 24244c7c734SChengming Zhou static unsigned int nr_zswap_trees[MAX_SWAPFILES]; 2432b281117SSeth Jennings 244f1c54846SDan Streetman /* RCU-protected iteration */ 245f1c54846SDan Streetman static LIST_HEAD(zswap_pools); 246f1c54846SDan Streetman /* protects zswap_pools list modification */ 247f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock); 24832a4e169SDan Streetman /* pool counter to provide unique names to zpool */ 24932a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0); 250f1c54846SDan Streetman 2519021ccecSLiu Shixin enum zswap_init_type { 2529021ccecSLiu Shixin ZSWAP_UNINIT, 2539021ccecSLiu Shixin ZSWAP_INIT_SUCCEED, 2549021ccecSLiu Shixin ZSWAP_INIT_FAILED 2559021ccecSLiu Shixin }; 25690b0fc26SDan Streetman 2579021ccecSLiu Shixin static enum zswap_init_type zswap_init_state; 258d7b028f5SDan Streetman 259141fdeecSLiu Shixin /* used to ensure the integrity of initialization */ 260141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock); 261f1c54846SDan Streetman 262ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */ 263ae3d89a7SDan Streetman static bool zswap_has_pool; 264ae3d89a7SDan Streetman 265f1c54846SDan Streetman /********************************* 266f1c54846SDan Streetman * helpers and fwd declarations 267f1c54846SDan Streetman **********************************/ 268f1c54846SDan Streetman 26944c7c734SChengming Zhou static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp) 27044c7c734SChengming Zhou { 27144c7c734SChengming Zhou return &zswap_trees[swp_type(swp)][swp_offset(swp) 27244c7c734SChengming Zhou >> SWAP_ADDRESS_SPACE_SHIFT]; 27344c7c734SChengming Zhou } 27444c7c734SChengming Zhou 275f1c54846SDan Streetman #define zswap_pool_debug(msg, p) \ 276f1c54846SDan Streetman pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 277b8cf32dcSYosry Ahmed zpool_get_type((p)->zpools[0])) 278f1c54846SDan Streetman 2790bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry, 2805878303cSChengming Zhou swp_entry_t swpentry); 281f1c54846SDan Streetman 282f1c54846SDan Streetman static bool zswap_is_full(void) 283f1c54846SDan Streetman { 284ca79b0c2SArun KS return totalram_pages() * zswap_max_pool_percent / 100 < 285f1c54846SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 286f1c54846SDan Streetman } 287f1c54846SDan Streetman 28845190f01SVitaly Wool static bool zswap_can_accept(void) 28945190f01SVitaly Wool { 29045190f01SVitaly Wool return totalram_pages() * zswap_accept_thr_percent / 100 * 29145190f01SVitaly Wool zswap_max_pool_percent / 100 > 29245190f01SVitaly Wool DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 29345190f01SVitaly Wool } 29445190f01SVitaly Wool 295b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool) 296b5ba474fSNhat Pham { 297b5ba474fSNhat Pham u64 pool_size = 0; 298b5ba474fSNhat Pham int i; 299b5ba474fSNhat Pham 300b5ba474fSNhat Pham for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 301b5ba474fSNhat Pham pool_size += zpool_get_total_size(pool->zpools[i]); 302b5ba474fSNhat Pham 303b5ba474fSNhat Pham return pool_size; 304b5ba474fSNhat Pham } 305b5ba474fSNhat Pham 306f1c54846SDan Streetman static void zswap_update_total_size(void) 307f1c54846SDan Streetman { 308f1c54846SDan Streetman struct zswap_pool *pool; 309f1c54846SDan Streetman u64 total = 0; 310f1c54846SDan Streetman 311f1c54846SDan Streetman rcu_read_lock(); 312f1c54846SDan Streetman 313f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 314b5ba474fSNhat Pham total += get_zswap_pool_size(pool); 315f1c54846SDan Streetman 316f1c54846SDan Streetman rcu_read_unlock(); 317f1c54846SDan Streetman 318f1c54846SDan Streetman zswap_pool_total_size = total; 319f1c54846SDan Streetman } 320f1c54846SDan Streetman 321a984649bSJohannes Weiner /********************************* 322a984649bSJohannes Weiner * pool functions 323a984649bSJohannes Weiner **********************************/ 324a984649bSJohannes Weiner 325a984649bSJohannes Weiner static void zswap_alloc_shrinker(struct zswap_pool *pool); 326a984649bSJohannes Weiner static void shrink_worker(struct work_struct *w); 327a984649bSJohannes Weiner 328a984649bSJohannes Weiner static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 329a984649bSJohannes Weiner { 330a984649bSJohannes Weiner int i; 331a984649bSJohannes Weiner struct zswap_pool *pool; 332a984649bSJohannes Weiner char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 333a984649bSJohannes Weiner gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 334a984649bSJohannes Weiner int ret; 335a984649bSJohannes Weiner 336a984649bSJohannes Weiner if (!zswap_has_pool) { 337a984649bSJohannes Weiner /* if either are unset, pool initialization failed, and we 338a984649bSJohannes Weiner * need both params to be set correctly before trying to 339a984649bSJohannes Weiner * create a pool. 340a984649bSJohannes Weiner */ 341a984649bSJohannes Weiner if (!strcmp(type, ZSWAP_PARAM_UNSET)) 342a984649bSJohannes Weiner return NULL; 343a984649bSJohannes Weiner if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 344a984649bSJohannes Weiner return NULL; 345a984649bSJohannes Weiner } 346a984649bSJohannes Weiner 347a984649bSJohannes Weiner pool = kzalloc(sizeof(*pool), GFP_KERNEL); 348a984649bSJohannes Weiner if (!pool) 349a984649bSJohannes Weiner return NULL; 350a984649bSJohannes Weiner 351a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 352a984649bSJohannes Weiner /* unique name for each pool specifically required by zsmalloc */ 353a984649bSJohannes Weiner snprintf(name, 38, "zswap%x", 354a984649bSJohannes Weiner atomic_inc_return(&zswap_pools_count)); 355a984649bSJohannes Weiner 356a984649bSJohannes Weiner pool->zpools[i] = zpool_create_pool(type, name, gfp); 357a984649bSJohannes Weiner if (!pool->zpools[i]) { 358a984649bSJohannes Weiner pr_err("%s zpool not available\n", type); 359a984649bSJohannes Weiner goto error; 360a984649bSJohannes Weiner } 361a984649bSJohannes Weiner } 362a984649bSJohannes Weiner pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 363a984649bSJohannes Weiner 364a984649bSJohannes Weiner strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 365a984649bSJohannes Weiner 366a984649bSJohannes Weiner pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 367a984649bSJohannes Weiner if (!pool->acomp_ctx) { 368a984649bSJohannes Weiner pr_err("percpu alloc failed\n"); 369a984649bSJohannes Weiner goto error; 370a984649bSJohannes Weiner } 371a984649bSJohannes Weiner 372a984649bSJohannes Weiner ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 373a984649bSJohannes Weiner &pool->node); 374a984649bSJohannes Weiner if (ret) 375a984649bSJohannes Weiner goto error; 376a984649bSJohannes Weiner 377a984649bSJohannes Weiner zswap_alloc_shrinker(pool); 378a984649bSJohannes Weiner if (!pool->shrinker) 379a984649bSJohannes Weiner goto error; 380a984649bSJohannes Weiner 381a984649bSJohannes Weiner pr_debug("using %s compressor\n", pool->tfm_name); 382a984649bSJohannes Weiner 383a984649bSJohannes Weiner /* being the current pool takes 1 ref; this func expects the 384a984649bSJohannes Weiner * caller to always add the new pool as the current pool 385a984649bSJohannes Weiner */ 386a984649bSJohannes Weiner kref_init(&pool->kref); 387a984649bSJohannes Weiner INIT_LIST_HEAD(&pool->list); 388a984649bSJohannes Weiner if (list_lru_init_memcg(&pool->list_lru, pool->shrinker)) 389a984649bSJohannes Weiner goto lru_fail; 390a984649bSJohannes Weiner shrinker_register(pool->shrinker); 391a984649bSJohannes Weiner INIT_WORK(&pool->shrink_work, shrink_worker); 392a984649bSJohannes Weiner atomic_set(&pool->nr_stored, 0); 393a984649bSJohannes Weiner 394a984649bSJohannes Weiner zswap_pool_debug("created", pool); 395a984649bSJohannes Weiner 396a984649bSJohannes Weiner return pool; 397a984649bSJohannes Weiner 398a984649bSJohannes Weiner lru_fail: 399a984649bSJohannes Weiner list_lru_destroy(&pool->list_lru); 400a984649bSJohannes Weiner shrinker_free(pool->shrinker); 401a984649bSJohannes Weiner error: 402a984649bSJohannes Weiner if (pool->acomp_ctx) 403a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 404a984649bSJohannes Weiner while (i--) 405a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 406a984649bSJohannes Weiner kfree(pool); 407a984649bSJohannes Weiner return NULL; 408a984649bSJohannes Weiner } 409a984649bSJohannes Weiner 410a984649bSJohannes Weiner static struct zswap_pool *__zswap_pool_create_fallback(void) 411a984649bSJohannes Weiner { 412a984649bSJohannes Weiner bool has_comp, has_zpool; 413a984649bSJohannes Weiner 414a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 415a984649bSJohannes Weiner if (!has_comp && strcmp(zswap_compressor, 416a984649bSJohannes Weiner CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 417a984649bSJohannes Weiner pr_err("compressor %s not available, using default %s\n", 418a984649bSJohannes Weiner zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 419a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 420a984649bSJohannes Weiner zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 421a984649bSJohannes Weiner has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 422a984649bSJohannes Weiner } 423a984649bSJohannes Weiner if (!has_comp) { 424a984649bSJohannes Weiner pr_err("default compressor %s not available\n", 425a984649bSJohannes Weiner zswap_compressor); 426a984649bSJohannes Weiner param_free_charp(&zswap_compressor); 427a984649bSJohannes Weiner zswap_compressor = ZSWAP_PARAM_UNSET; 428a984649bSJohannes Weiner } 429a984649bSJohannes Weiner 430a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 431a984649bSJohannes Weiner if (!has_zpool && strcmp(zswap_zpool_type, 432a984649bSJohannes Weiner CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 433a984649bSJohannes Weiner pr_err("zpool %s not available, using default %s\n", 434a984649bSJohannes Weiner zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 435a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 436a984649bSJohannes Weiner zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 437a984649bSJohannes Weiner has_zpool = zpool_has_pool(zswap_zpool_type); 438a984649bSJohannes Weiner } 439a984649bSJohannes Weiner if (!has_zpool) { 440a984649bSJohannes Weiner pr_err("default zpool %s not available\n", 441a984649bSJohannes Weiner zswap_zpool_type); 442a984649bSJohannes Weiner param_free_charp(&zswap_zpool_type); 443a984649bSJohannes Weiner zswap_zpool_type = ZSWAP_PARAM_UNSET; 444a984649bSJohannes Weiner } 445a984649bSJohannes Weiner 446a984649bSJohannes Weiner if (!has_comp || !has_zpool) 447a984649bSJohannes Weiner return NULL; 448a984649bSJohannes Weiner 449a984649bSJohannes Weiner return zswap_pool_create(zswap_zpool_type, zswap_compressor); 450a984649bSJohannes Weiner } 451a984649bSJohannes Weiner 452a984649bSJohannes Weiner static void zswap_pool_destroy(struct zswap_pool *pool) 453a984649bSJohannes Weiner { 454a984649bSJohannes Weiner int i; 455a984649bSJohannes Weiner 456a984649bSJohannes Weiner zswap_pool_debug("destroying", pool); 457a984649bSJohannes Weiner 458a984649bSJohannes Weiner shrinker_free(pool->shrinker); 459a984649bSJohannes Weiner cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 460a984649bSJohannes Weiner free_percpu(pool->acomp_ctx); 461a984649bSJohannes Weiner list_lru_destroy(&pool->list_lru); 462a984649bSJohannes Weiner 463a984649bSJohannes Weiner spin_lock(&zswap_pools_lock); 464a984649bSJohannes Weiner mem_cgroup_iter_break(NULL, pool->next_shrink); 465a984649bSJohannes Weiner pool->next_shrink = NULL; 466a984649bSJohannes Weiner spin_unlock(&zswap_pools_lock); 467a984649bSJohannes Weiner 468a984649bSJohannes Weiner for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 469a984649bSJohannes Weiner zpool_destroy_pool(pool->zpools[i]); 470a984649bSJohannes Weiner kfree(pool); 471a984649bSJohannes Weiner } 472a984649bSJohannes Weiner 47339f3ec8eSJohannes Weiner static void __zswap_pool_release(struct work_struct *work) 47439f3ec8eSJohannes Weiner { 47539f3ec8eSJohannes Weiner struct zswap_pool *pool = container_of(work, typeof(*pool), 47639f3ec8eSJohannes Weiner release_work); 47739f3ec8eSJohannes Weiner 47839f3ec8eSJohannes Weiner synchronize_rcu(); 47939f3ec8eSJohannes Weiner 48039f3ec8eSJohannes Weiner /* nobody should have been able to get a kref... */ 48139f3ec8eSJohannes Weiner WARN_ON(kref_get_unless_zero(&pool->kref)); 48239f3ec8eSJohannes Weiner 48339f3ec8eSJohannes Weiner /* pool is now off zswap_pools list and has no references. */ 48439f3ec8eSJohannes Weiner zswap_pool_destroy(pool); 48539f3ec8eSJohannes Weiner } 48639f3ec8eSJohannes Weiner 48739f3ec8eSJohannes Weiner static struct zswap_pool *zswap_pool_current(void); 48839f3ec8eSJohannes Weiner 48939f3ec8eSJohannes Weiner static void __zswap_pool_empty(struct kref *kref) 49039f3ec8eSJohannes Weiner { 49139f3ec8eSJohannes Weiner struct zswap_pool *pool; 49239f3ec8eSJohannes Weiner 49339f3ec8eSJohannes Weiner pool = container_of(kref, typeof(*pool), kref); 49439f3ec8eSJohannes Weiner 49539f3ec8eSJohannes Weiner spin_lock(&zswap_pools_lock); 49639f3ec8eSJohannes Weiner 49739f3ec8eSJohannes Weiner WARN_ON(pool == zswap_pool_current()); 49839f3ec8eSJohannes Weiner 49939f3ec8eSJohannes Weiner list_del_rcu(&pool->list); 50039f3ec8eSJohannes Weiner 50139f3ec8eSJohannes Weiner INIT_WORK(&pool->release_work, __zswap_pool_release); 50239f3ec8eSJohannes Weiner schedule_work(&pool->release_work); 50339f3ec8eSJohannes Weiner 50439f3ec8eSJohannes Weiner spin_unlock(&zswap_pools_lock); 50539f3ec8eSJohannes Weiner } 50639f3ec8eSJohannes Weiner 50739f3ec8eSJohannes Weiner static int __must_check zswap_pool_get(struct zswap_pool *pool) 50839f3ec8eSJohannes Weiner { 50939f3ec8eSJohannes Weiner if (!pool) 51039f3ec8eSJohannes Weiner return 0; 51139f3ec8eSJohannes Weiner 51239f3ec8eSJohannes Weiner return kref_get_unless_zero(&pool->kref); 51339f3ec8eSJohannes Weiner } 51439f3ec8eSJohannes Weiner 51539f3ec8eSJohannes Weiner static void zswap_pool_put(struct zswap_pool *pool) 51639f3ec8eSJohannes Weiner { 51739f3ec8eSJohannes Weiner kref_put(&pool->kref, __zswap_pool_empty); 51839f3ec8eSJohannes Weiner } 51939f3ec8eSJohannes Weiner 520c1a0ecb8SJohannes Weiner static struct zswap_pool *__zswap_pool_current(void) 521c1a0ecb8SJohannes Weiner { 522c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 523c1a0ecb8SJohannes Weiner 524c1a0ecb8SJohannes Weiner pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 525c1a0ecb8SJohannes Weiner WARN_ONCE(!pool && zswap_has_pool, 526c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 527c1a0ecb8SJohannes Weiner 528c1a0ecb8SJohannes Weiner return pool; 529c1a0ecb8SJohannes Weiner } 530c1a0ecb8SJohannes Weiner 531c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current(void) 532c1a0ecb8SJohannes Weiner { 533c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 534c1a0ecb8SJohannes Weiner 535c1a0ecb8SJohannes Weiner return __zswap_pool_current(); 536c1a0ecb8SJohannes Weiner } 537c1a0ecb8SJohannes Weiner 538c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_current_get(void) 539c1a0ecb8SJohannes Weiner { 540c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 541c1a0ecb8SJohannes Weiner 542c1a0ecb8SJohannes Weiner rcu_read_lock(); 543c1a0ecb8SJohannes Weiner 544c1a0ecb8SJohannes Weiner pool = __zswap_pool_current(); 545c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 546c1a0ecb8SJohannes Weiner pool = NULL; 547c1a0ecb8SJohannes Weiner 548c1a0ecb8SJohannes Weiner rcu_read_unlock(); 549c1a0ecb8SJohannes Weiner 550c1a0ecb8SJohannes Weiner return pool; 551c1a0ecb8SJohannes Weiner } 552c1a0ecb8SJohannes Weiner 553c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_last_get(void) 554c1a0ecb8SJohannes Weiner { 555c1a0ecb8SJohannes Weiner struct zswap_pool *pool, *last = NULL; 556c1a0ecb8SJohannes Weiner 557c1a0ecb8SJohannes Weiner rcu_read_lock(); 558c1a0ecb8SJohannes Weiner 559c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) 560c1a0ecb8SJohannes Weiner last = pool; 561c1a0ecb8SJohannes Weiner WARN_ONCE(!last && zswap_has_pool, 562c1a0ecb8SJohannes Weiner "%s: no page storage pool!\n", __func__); 563c1a0ecb8SJohannes Weiner if (!zswap_pool_get(last)) 564c1a0ecb8SJohannes Weiner last = NULL; 565c1a0ecb8SJohannes Weiner 566c1a0ecb8SJohannes Weiner rcu_read_unlock(); 567c1a0ecb8SJohannes Weiner 568c1a0ecb8SJohannes Weiner return last; 569c1a0ecb8SJohannes Weiner } 570c1a0ecb8SJohannes Weiner 571c1a0ecb8SJohannes Weiner /* type and compressor must be null-terminated */ 572c1a0ecb8SJohannes Weiner static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 573c1a0ecb8SJohannes Weiner { 574c1a0ecb8SJohannes Weiner struct zswap_pool *pool; 575c1a0ecb8SJohannes Weiner 576c1a0ecb8SJohannes Weiner assert_spin_locked(&zswap_pools_lock); 577c1a0ecb8SJohannes Weiner 578c1a0ecb8SJohannes Weiner list_for_each_entry_rcu(pool, &zswap_pools, list) { 579c1a0ecb8SJohannes Weiner if (strcmp(pool->tfm_name, compressor)) 580c1a0ecb8SJohannes Weiner continue; 581c1a0ecb8SJohannes Weiner /* all zpools share the same type */ 582c1a0ecb8SJohannes Weiner if (strcmp(zpool_get_type(pool->zpools[0]), type)) 583c1a0ecb8SJohannes Weiner continue; 584c1a0ecb8SJohannes Weiner /* if we can't get it, it's about to be destroyed */ 585c1a0ecb8SJohannes Weiner if (!zswap_pool_get(pool)) 586c1a0ecb8SJohannes Weiner continue; 587c1a0ecb8SJohannes Weiner return pool; 588c1a0ecb8SJohannes Weiner } 589c1a0ecb8SJohannes Weiner 590c1a0ecb8SJohannes Weiner return NULL; 591c1a0ecb8SJohannes Weiner } 592c1a0ecb8SJohannes Weiner 593abca07c0SJohannes Weiner /********************************* 594abca07c0SJohannes Weiner * param callbacks 595abca07c0SJohannes Weiner **********************************/ 596abca07c0SJohannes Weiner 597abca07c0SJohannes Weiner static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 598abca07c0SJohannes Weiner { 599abca07c0SJohannes Weiner /* no change required */ 600abca07c0SJohannes Weiner if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 601abca07c0SJohannes Weiner return false; 602abca07c0SJohannes Weiner return true; 603abca07c0SJohannes Weiner } 604abca07c0SJohannes Weiner 605abca07c0SJohannes Weiner /* val must be a null-terminated string */ 606abca07c0SJohannes Weiner static int __zswap_param_set(const char *val, const struct kernel_param *kp, 607abca07c0SJohannes Weiner char *type, char *compressor) 608abca07c0SJohannes Weiner { 609abca07c0SJohannes Weiner struct zswap_pool *pool, *put_pool = NULL; 610abca07c0SJohannes Weiner char *s = strstrip((char *)val); 611abca07c0SJohannes Weiner int ret = 0; 612abca07c0SJohannes Weiner bool new_pool = false; 613abca07c0SJohannes Weiner 614abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 615abca07c0SJohannes Weiner switch (zswap_init_state) { 616abca07c0SJohannes Weiner case ZSWAP_UNINIT: 617abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, 618abca07c0SJohannes Weiner * don't create a pool; that's done during init. 619abca07c0SJohannes Weiner */ 620abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 621abca07c0SJohannes Weiner break; 622abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 623abca07c0SJohannes Weiner new_pool = zswap_pool_changed(s, kp); 624abca07c0SJohannes Weiner break; 625abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 626abca07c0SJohannes Weiner pr_err("can't set param, initialization failed\n"); 627abca07c0SJohannes Weiner ret = -ENODEV; 628abca07c0SJohannes Weiner } 629abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 630abca07c0SJohannes Weiner 631abca07c0SJohannes Weiner /* no need to create a new pool, return directly */ 632abca07c0SJohannes Weiner if (!new_pool) 633abca07c0SJohannes Weiner return ret; 634abca07c0SJohannes Weiner 635abca07c0SJohannes Weiner if (!type) { 636abca07c0SJohannes Weiner if (!zpool_has_pool(s)) { 637abca07c0SJohannes Weiner pr_err("zpool %s not available\n", s); 638abca07c0SJohannes Weiner return -ENOENT; 639abca07c0SJohannes Weiner } 640abca07c0SJohannes Weiner type = s; 641abca07c0SJohannes Weiner } else if (!compressor) { 642abca07c0SJohannes Weiner if (!crypto_has_acomp(s, 0, 0)) { 643abca07c0SJohannes Weiner pr_err("compressor %s not available\n", s); 644abca07c0SJohannes Weiner return -ENOENT; 645abca07c0SJohannes Weiner } 646abca07c0SJohannes Weiner compressor = s; 647abca07c0SJohannes Weiner } else { 648abca07c0SJohannes Weiner WARN_ON(1); 649abca07c0SJohannes Weiner return -EINVAL; 650abca07c0SJohannes Weiner } 651abca07c0SJohannes Weiner 652abca07c0SJohannes Weiner spin_lock(&zswap_pools_lock); 653abca07c0SJohannes Weiner 654abca07c0SJohannes Weiner pool = zswap_pool_find_get(type, compressor); 655abca07c0SJohannes Weiner if (pool) { 656abca07c0SJohannes Weiner zswap_pool_debug("using existing", pool); 657abca07c0SJohannes Weiner WARN_ON(pool == zswap_pool_current()); 658abca07c0SJohannes Weiner list_del_rcu(&pool->list); 659abca07c0SJohannes Weiner } 660abca07c0SJohannes Weiner 661abca07c0SJohannes Weiner spin_unlock(&zswap_pools_lock); 662abca07c0SJohannes Weiner 663abca07c0SJohannes Weiner if (!pool) 664abca07c0SJohannes Weiner pool = zswap_pool_create(type, compressor); 665abca07c0SJohannes Weiner 666abca07c0SJohannes Weiner if (pool) 667abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 668abca07c0SJohannes Weiner else 669abca07c0SJohannes Weiner ret = -EINVAL; 670abca07c0SJohannes Weiner 671abca07c0SJohannes Weiner spin_lock(&zswap_pools_lock); 672abca07c0SJohannes Weiner 673abca07c0SJohannes Weiner if (!ret) { 674abca07c0SJohannes Weiner put_pool = zswap_pool_current(); 675abca07c0SJohannes Weiner list_add_rcu(&pool->list, &zswap_pools); 676abca07c0SJohannes Weiner zswap_has_pool = true; 677abca07c0SJohannes Weiner } else if (pool) { 678abca07c0SJohannes Weiner /* add the possibly pre-existing pool to the end of the pools 679abca07c0SJohannes Weiner * list; if it's new (and empty) then it'll be removed and 680abca07c0SJohannes Weiner * destroyed by the put after we drop the lock 681abca07c0SJohannes Weiner */ 682abca07c0SJohannes Weiner list_add_tail_rcu(&pool->list, &zswap_pools); 683abca07c0SJohannes Weiner put_pool = pool; 684abca07c0SJohannes Weiner } 685abca07c0SJohannes Weiner 686abca07c0SJohannes Weiner spin_unlock(&zswap_pools_lock); 687abca07c0SJohannes Weiner 688abca07c0SJohannes Weiner if (!zswap_has_pool && !pool) { 689abca07c0SJohannes Weiner /* if initial pool creation failed, and this pool creation also 690abca07c0SJohannes Weiner * failed, maybe both compressor and zpool params were bad. 691abca07c0SJohannes Weiner * Allow changing this param, so pool creation will succeed 692abca07c0SJohannes Weiner * when the other param is changed. We already verified this 693abca07c0SJohannes Weiner * param is ok in the zpool_has_pool() or crypto_has_acomp() 694abca07c0SJohannes Weiner * checks above. 695abca07c0SJohannes Weiner */ 696abca07c0SJohannes Weiner ret = param_set_charp(s, kp); 697abca07c0SJohannes Weiner } 698abca07c0SJohannes Weiner 699abca07c0SJohannes Weiner /* drop the ref from either the old current pool, 700abca07c0SJohannes Weiner * or the new pool we failed to add 701abca07c0SJohannes Weiner */ 702abca07c0SJohannes Weiner if (put_pool) 703abca07c0SJohannes Weiner zswap_pool_put(put_pool); 704abca07c0SJohannes Weiner 705abca07c0SJohannes Weiner return ret; 706abca07c0SJohannes Weiner } 707abca07c0SJohannes Weiner 708abca07c0SJohannes Weiner static int zswap_compressor_param_set(const char *val, 709abca07c0SJohannes Weiner const struct kernel_param *kp) 710abca07c0SJohannes Weiner { 711abca07c0SJohannes Weiner return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 712abca07c0SJohannes Weiner } 713abca07c0SJohannes Weiner 714abca07c0SJohannes Weiner static int zswap_zpool_param_set(const char *val, 715abca07c0SJohannes Weiner const struct kernel_param *kp) 716abca07c0SJohannes Weiner { 717abca07c0SJohannes Weiner return __zswap_param_set(val, kp, NULL, zswap_compressor); 718abca07c0SJohannes Weiner } 719abca07c0SJohannes Weiner 720abca07c0SJohannes Weiner static int zswap_enabled_param_set(const char *val, 721abca07c0SJohannes Weiner const struct kernel_param *kp) 722abca07c0SJohannes Weiner { 723abca07c0SJohannes Weiner int ret = -ENODEV; 724abca07c0SJohannes Weiner 725abca07c0SJohannes Weiner /* if this is load-time (pre-init) param setting, only set param. */ 726abca07c0SJohannes Weiner if (system_state != SYSTEM_RUNNING) 727abca07c0SJohannes Weiner return param_set_bool(val, kp); 728abca07c0SJohannes Weiner 729abca07c0SJohannes Weiner mutex_lock(&zswap_init_lock); 730abca07c0SJohannes Weiner switch (zswap_init_state) { 731abca07c0SJohannes Weiner case ZSWAP_UNINIT: 732abca07c0SJohannes Weiner if (zswap_setup()) 733abca07c0SJohannes Weiner break; 734abca07c0SJohannes Weiner fallthrough; 735abca07c0SJohannes Weiner case ZSWAP_INIT_SUCCEED: 736abca07c0SJohannes Weiner if (!zswap_has_pool) 737abca07c0SJohannes Weiner pr_err("can't enable, no pool configured\n"); 738abca07c0SJohannes Weiner else 739abca07c0SJohannes Weiner ret = param_set_bool(val, kp); 740abca07c0SJohannes Weiner break; 741abca07c0SJohannes Weiner case ZSWAP_INIT_FAILED: 742abca07c0SJohannes Weiner pr_err("can't enable, initialization failed\n"); 743abca07c0SJohannes Weiner } 744abca07c0SJohannes Weiner mutex_unlock(&zswap_init_lock); 745abca07c0SJohannes Weiner 746abca07c0SJohannes Weiner return ret; 747abca07c0SJohannes Weiner } 748abca07c0SJohannes Weiner 749*506a86c5SJohannes Weiner /********************************* 750*506a86c5SJohannes Weiner * lru functions 751*506a86c5SJohannes Weiner **********************************/ 752*506a86c5SJohannes Weiner 753a65b0e76SDomenico Cerasuolo /* should be called under RCU */ 754a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG 755a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 756a65b0e76SDomenico Cerasuolo { 757a65b0e76SDomenico Cerasuolo return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; 758a65b0e76SDomenico Cerasuolo } 759a65b0e76SDomenico Cerasuolo #else 760a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 761a65b0e76SDomenico Cerasuolo { 762a65b0e76SDomenico Cerasuolo return NULL; 763a65b0e76SDomenico Cerasuolo } 764a65b0e76SDomenico Cerasuolo #endif 765a65b0e76SDomenico Cerasuolo 766a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry) 767a65b0e76SDomenico Cerasuolo { 768a65b0e76SDomenico Cerasuolo return page_to_nid(virt_to_page(entry)); 769a65b0e76SDomenico Cerasuolo } 770a65b0e76SDomenico Cerasuolo 771*506a86c5SJohannes Weiner void zswap_lruvec_state_init(struct lruvec *lruvec) 772*506a86c5SJohannes Weiner { 773*506a86c5SJohannes Weiner atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0); 774*506a86c5SJohannes Weiner } 775*506a86c5SJohannes Weiner 776*506a86c5SJohannes Weiner void zswap_folio_swapin(struct folio *folio) 777*506a86c5SJohannes Weiner { 778*506a86c5SJohannes Weiner struct lruvec *lruvec; 779*506a86c5SJohannes Weiner 780*506a86c5SJohannes Weiner if (folio) { 781*506a86c5SJohannes Weiner lruvec = folio_lruvec(folio); 782*506a86c5SJohannes Weiner atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 783*506a86c5SJohannes Weiner } 784*506a86c5SJohannes Weiner } 785*506a86c5SJohannes Weiner 786a65b0e76SDomenico Cerasuolo void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) 787a65b0e76SDomenico Cerasuolo { 788a65b0e76SDomenico Cerasuolo struct zswap_pool *pool; 789a65b0e76SDomenico Cerasuolo 790a65b0e76SDomenico Cerasuolo /* lock out zswap pools list modification */ 791a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 792a65b0e76SDomenico Cerasuolo list_for_each_entry(pool, &zswap_pools, list) { 793a65b0e76SDomenico Cerasuolo if (pool->next_shrink == memcg) 794a65b0e76SDomenico Cerasuolo pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 795a65b0e76SDomenico Cerasuolo } 796a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 797a65b0e76SDomenico Cerasuolo } 798a65b0e76SDomenico Cerasuolo 7992b281117SSeth Jennings /********************************* 8002b281117SSeth Jennings * zswap entry functions 8012b281117SSeth Jennings **********************************/ 8022b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache; 8032b281117SSeth Jennings 804a65b0e76SDomenico Cerasuolo static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) 8052b281117SSeth Jennings { 8062b281117SSeth Jennings struct zswap_entry *entry; 807a65b0e76SDomenico Cerasuolo entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); 8082b281117SSeth Jennings if (!entry) 8092b281117SSeth Jennings return NULL; 8102b281117SSeth Jennings entry->refcount = 1; 8110ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 8122b281117SSeth Jennings return entry; 8132b281117SSeth Jennings } 8142b281117SSeth Jennings 8152b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry) 8162b281117SSeth Jennings { 8172b281117SSeth Jennings kmem_cache_free(zswap_entry_cache, entry); 8182b281117SSeth Jennings } 8192b281117SSeth Jennings 8202b281117SSeth Jennings /********************************* 821a65b0e76SDomenico Cerasuolo * lru functions 822a65b0e76SDomenico Cerasuolo **********************************/ 823a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) 824a65b0e76SDomenico Cerasuolo { 825b5ba474fSNhat Pham atomic_long_t *nr_zswap_protected; 826b5ba474fSNhat Pham unsigned long lru_size, old, new; 827a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 828a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 829b5ba474fSNhat Pham struct lruvec *lruvec; 830a65b0e76SDomenico Cerasuolo 831a65b0e76SDomenico Cerasuolo /* 832a65b0e76SDomenico Cerasuolo * Note that it is safe to use rcu_read_lock() here, even in the face of 833a65b0e76SDomenico Cerasuolo * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection 834a65b0e76SDomenico Cerasuolo * used in list_lru lookup, only two scenarios are possible: 835a65b0e76SDomenico Cerasuolo * 836a65b0e76SDomenico Cerasuolo * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The 837a65b0e76SDomenico Cerasuolo * new entry will be reparented to memcg's parent's list_lru. 838a65b0e76SDomenico Cerasuolo * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The 839a65b0e76SDomenico Cerasuolo * new entry will be added directly to memcg's parent's list_lru. 840a65b0e76SDomenico Cerasuolo * 8413f798aa6SChengming Zhou * Similar reasoning holds for list_lru_del(). 842a65b0e76SDomenico Cerasuolo */ 843a65b0e76SDomenico Cerasuolo rcu_read_lock(); 844a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 845a65b0e76SDomenico Cerasuolo /* will always succeed */ 846a65b0e76SDomenico Cerasuolo list_lru_add(list_lru, &entry->lru, nid, memcg); 847b5ba474fSNhat Pham 848b5ba474fSNhat Pham /* Update the protection area */ 849b5ba474fSNhat Pham lru_size = list_lru_count_one(list_lru, nid, memcg); 850b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); 851b5ba474fSNhat Pham nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected; 852b5ba474fSNhat Pham old = atomic_long_inc_return(nr_zswap_protected); 853b5ba474fSNhat Pham /* 854b5ba474fSNhat Pham * Decay to avoid overflow and adapt to changing workloads. 855b5ba474fSNhat Pham * This is based on LRU reclaim cost decaying heuristics. 856b5ba474fSNhat Pham */ 857b5ba474fSNhat Pham do { 858b5ba474fSNhat Pham new = old > lru_size / 4 ? old / 2 : old; 859b5ba474fSNhat Pham } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new)); 860a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 861a65b0e76SDomenico Cerasuolo } 862a65b0e76SDomenico Cerasuolo 863a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) 864a65b0e76SDomenico Cerasuolo { 865a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 866a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 867a65b0e76SDomenico Cerasuolo 868a65b0e76SDomenico Cerasuolo rcu_read_lock(); 869a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 870a65b0e76SDomenico Cerasuolo /* will always succeed */ 871a65b0e76SDomenico Cerasuolo list_lru_del(list_lru, &entry->lru, nid, memcg); 872a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 873a65b0e76SDomenico Cerasuolo } 874a65b0e76SDomenico Cerasuolo 875a65b0e76SDomenico Cerasuolo /********************************* 8762b281117SSeth Jennings * rbtree functions 8772b281117SSeth Jennings **********************************/ 8782b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 8792b281117SSeth Jennings { 8802b281117SSeth Jennings struct rb_node *node = root->rb_node; 8812b281117SSeth Jennings struct zswap_entry *entry; 8820bb48849SDomenico Cerasuolo pgoff_t entry_offset; 8832b281117SSeth Jennings 8842b281117SSeth Jennings while (node) { 8852b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 8860bb48849SDomenico Cerasuolo entry_offset = swp_offset(entry->swpentry); 8870bb48849SDomenico Cerasuolo if (entry_offset > offset) 8882b281117SSeth Jennings node = node->rb_left; 8890bb48849SDomenico Cerasuolo else if (entry_offset < offset) 8902b281117SSeth Jennings node = node->rb_right; 8912b281117SSeth Jennings else 8922b281117SSeth Jennings return entry; 8932b281117SSeth Jennings } 8942b281117SSeth Jennings return NULL; 8952b281117SSeth Jennings } 8962b281117SSeth Jennings 8972b281117SSeth Jennings /* 8982b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 8992b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 9002b281117SSeth Jennings */ 9012b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 9022b281117SSeth Jennings struct zswap_entry **dupentry) 9032b281117SSeth Jennings { 9042b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 9052b281117SSeth Jennings struct zswap_entry *myentry; 9060bb48849SDomenico Cerasuolo pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 9072b281117SSeth Jennings 9082b281117SSeth Jennings while (*link) { 9092b281117SSeth Jennings parent = *link; 9102b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 9110bb48849SDomenico Cerasuolo myentry_offset = swp_offset(myentry->swpentry); 9120bb48849SDomenico Cerasuolo if (myentry_offset > entry_offset) 9132b281117SSeth Jennings link = &(*link)->rb_left; 9140bb48849SDomenico Cerasuolo else if (myentry_offset < entry_offset) 9152b281117SSeth Jennings link = &(*link)->rb_right; 9162b281117SSeth Jennings else { 9172b281117SSeth Jennings *dupentry = myentry; 9182b281117SSeth Jennings return -EEXIST; 9192b281117SSeth Jennings } 9202b281117SSeth Jennings } 9212b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 9222b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 9232b281117SSeth Jennings return 0; 9242b281117SSeth Jennings } 9252b281117SSeth Jennings 92618a93707SYosry Ahmed static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 9270ab0abcfSWeijie Yang { 9280ab0abcfSWeijie Yang if (!RB_EMPTY_NODE(&entry->rbnode)) { 9290ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 9300ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 93118a93707SYosry Ahmed return true; 9320ab0abcfSWeijie Yang } 93318a93707SYosry Ahmed return false; 9340ab0abcfSWeijie Yang } 9350ab0abcfSWeijie Yang 936b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 937b8cf32dcSYosry Ahmed { 938b8cf32dcSYosry Ahmed int i = 0; 939b8cf32dcSYosry Ahmed 940b8cf32dcSYosry Ahmed if (ZSWAP_NR_ZPOOLS > 1) 941b8cf32dcSYosry Ahmed i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 942b8cf32dcSYosry Ahmed 943b8cf32dcSYosry Ahmed return entry->pool->zpools[i]; 944b8cf32dcSYosry Ahmed } 945b8cf32dcSYosry Ahmed 9460ab0abcfSWeijie Yang /* 94712d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 9480ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 9490ab0abcfSWeijie Yang */ 95042398be2SJohannes Weiner static void zswap_entry_free(struct zswap_entry *entry) 9510ab0abcfSWeijie Yang { 952a85f878bSSrividya Desireddy if (!entry->length) 953a85f878bSSrividya Desireddy atomic_dec(&zswap_same_filled_pages); 954a85f878bSSrividya Desireddy else { 955a65b0e76SDomenico Cerasuolo zswap_lru_del(&entry->pool->list_lru, entry); 956b8cf32dcSYosry Ahmed zpool_free(zswap_find_zpool(entry), entry->handle); 957b5ba474fSNhat Pham atomic_dec(&entry->pool->nr_stored); 958f1c54846SDan Streetman zswap_pool_put(entry->pool); 959a85f878bSSrividya Desireddy } 9602e601e1eSJohannes Weiner if (entry->objcg) { 9612e601e1eSJohannes Weiner obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 9622e601e1eSJohannes Weiner obj_cgroup_put(entry->objcg); 9632e601e1eSJohannes Weiner } 9640ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 9650ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 966f1c54846SDan Streetman zswap_update_total_size(); 9670ab0abcfSWeijie Yang } 9680ab0abcfSWeijie Yang 9690ab0abcfSWeijie Yang /* caller must hold the tree lock */ 9700ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry) 9710ab0abcfSWeijie Yang { 972e477559cSJohannes Weiner WARN_ON_ONCE(!entry->refcount); 9730ab0abcfSWeijie Yang entry->refcount++; 9740ab0abcfSWeijie Yang } 9750ab0abcfSWeijie Yang 976dab7711fSJohannes Weiner /* caller must hold the tree lock */ 977db128f5fSYosry Ahmed static void zswap_entry_put(struct zswap_entry *entry) 9780ab0abcfSWeijie Yang { 979dab7711fSJohannes Weiner WARN_ON_ONCE(!entry->refcount); 980dab7711fSJohannes Weiner if (--entry->refcount == 0) { 98173108957SJohannes Weiner WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode)); 98242398be2SJohannes Weiner zswap_entry_free(entry); 9830ab0abcfSWeijie Yang } 9840ab0abcfSWeijie Yang } 9850ab0abcfSWeijie Yang 9867dd1f7f0SJohannes Weiner /* 9877dd1f7f0SJohannes Weiner * If the entry is still valid in the tree, drop the initial ref and remove it 9887dd1f7f0SJohannes Weiner * from the tree. This function must be called with an additional ref held, 9897dd1f7f0SJohannes Weiner * otherwise it may race with another invalidation freeing the entry. 9907dd1f7f0SJohannes Weiner */ 9917dd1f7f0SJohannes Weiner static void zswap_invalidate_entry(struct zswap_tree *tree, 9927dd1f7f0SJohannes Weiner struct zswap_entry *entry) 9937dd1f7f0SJohannes Weiner { 9947dd1f7f0SJohannes Weiner if (zswap_rb_erase(&tree->rbroot, entry)) 9957dd1f7f0SJohannes Weiner zswap_entry_put(entry); 9967dd1f7f0SJohannes Weiner } 9977dd1f7f0SJohannes Weiner 9982b281117SSeth Jennings /********************************* 999b5ba474fSNhat Pham * shrinker functions 1000b5ba474fSNhat Pham **********************************/ 1001b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 1002b5ba474fSNhat Pham spinlock_t *lock, void *arg); 1003b5ba474fSNhat Pham 1004b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, 1005b5ba474fSNhat Pham struct shrink_control *sc) 1006b5ba474fSNhat Pham { 1007b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); 1008b5ba474fSNhat Pham unsigned long shrink_ret, nr_protected, lru_size; 1009b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 1010b5ba474fSNhat Pham bool encountered_page_in_swapcache = false; 1011b5ba474fSNhat Pham 1012501a06feSNhat Pham if (!zswap_shrinker_enabled || 1013501a06feSNhat Pham !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { 1014b5ba474fSNhat Pham sc->nr_scanned = 0; 1015b5ba474fSNhat Pham return SHRINK_STOP; 1016b5ba474fSNhat Pham } 1017b5ba474fSNhat Pham 1018b5ba474fSNhat Pham nr_protected = 1019b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1020b5ba474fSNhat Pham lru_size = list_lru_shrink_count(&pool->list_lru, sc); 1021b5ba474fSNhat Pham 1022b5ba474fSNhat Pham /* 1023b5ba474fSNhat Pham * Abort if we are shrinking into the protected region. 1024b5ba474fSNhat Pham * 1025b5ba474fSNhat Pham * This short-circuiting is necessary because if we have too many multiple 1026b5ba474fSNhat Pham * concurrent reclaimers getting the freeable zswap object counts at the 1027b5ba474fSNhat Pham * same time (before any of them made reasonable progress), the total 1028b5ba474fSNhat Pham * number of reclaimed objects might be more than the number of unprotected 1029b5ba474fSNhat Pham * objects (i.e the reclaimers will reclaim into the protected area of the 1030b5ba474fSNhat Pham * zswap LRU). 1031b5ba474fSNhat Pham */ 1032b5ba474fSNhat Pham if (nr_protected >= lru_size - sc->nr_to_scan) { 1033b5ba474fSNhat Pham sc->nr_scanned = 0; 1034b5ba474fSNhat Pham return SHRINK_STOP; 1035b5ba474fSNhat Pham } 1036b5ba474fSNhat Pham 1037b5ba474fSNhat Pham shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb, 1038b5ba474fSNhat Pham &encountered_page_in_swapcache); 1039b5ba474fSNhat Pham 1040b5ba474fSNhat Pham if (encountered_page_in_swapcache) 1041b5ba474fSNhat Pham return SHRINK_STOP; 1042b5ba474fSNhat Pham 1043b5ba474fSNhat Pham return shrink_ret ? shrink_ret : SHRINK_STOP; 1044b5ba474fSNhat Pham } 1045b5ba474fSNhat Pham 1046b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker, 1047b5ba474fSNhat Pham struct shrink_control *sc) 1048b5ba474fSNhat Pham { 1049b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 1050b5ba474fSNhat Pham struct mem_cgroup *memcg = sc->memcg; 1051b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); 1052b5ba474fSNhat Pham unsigned long nr_backing, nr_stored, nr_freeable, nr_protected; 1053b5ba474fSNhat Pham 1054501a06feSNhat Pham if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) 1055b5ba474fSNhat Pham return 0; 1056b5ba474fSNhat Pham 1057b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM 10587d7ef0a4SYosry Ahmed mem_cgroup_flush_stats(memcg); 1059b5ba474fSNhat Pham nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; 1060b5ba474fSNhat Pham nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); 1061b5ba474fSNhat Pham #else 1062b5ba474fSNhat Pham /* use pool stats instead of memcg stats */ 1063b5ba474fSNhat Pham nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT; 1064b5ba474fSNhat Pham nr_stored = atomic_read(&pool->nr_stored); 1065b5ba474fSNhat Pham #endif 1066b5ba474fSNhat Pham 1067b5ba474fSNhat Pham if (!nr_stored) 1068b5ba474fSNhat Pham return 0; 1069b5ba474fSNhat Pham 1070b5ba474fSNhat Pham nr_protected = 1071b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 1072b5ba474fSNhat Pham nr_freeable = list_lru_shrink_count(&pool->list_lru, sc); 1073b5ba474fSNhat Pham /* 1074b5ba474fSNhat Pham * Subtract the lru size by an estimate of the number of pages 1075b5ba474fSNhat Pham * that should be protected. 1076b5ba474fSNhat Pham */ 1077b5ba474fSNhat Pham nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0; 1078b5ba474fSNhat Pham 1079b5ba474fSNhat Pham /* 1080b5ba474fSNhat Pham * Scale the number of freeable pages by the memory saving factor. 1081b5ba474fSNhat Pham * This ensures that the better zswap compresses memory, the fewer 1082b5ba474fSNhat Pham * pages we will evict to swap (as it will otherwise incur IO for 1083b5ba474fSNhat Pham * relatively small memory saving). 1084b5ba474fSNhat Pham */ 1085b5ba474fSNhat Pham return mult_frac(nr_freeable, nr_backing, nr_stored); 1086b5ba474fSNhat Pham } 1087b5ba474fSNhat Pham 1088b5ba474fSNhat Pham static void zswap_alloc_shrinker(struct zswap_pool *pool) 1089b5ba474fSNhat Pham { 1090b5ba474fSNhat Pham pool->shrinker = 1091b5ba474fSNhat Pham shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); 1092b5ba474fSNhat Pham if (!pool->shrinker) 1093b5ba474fSNhat Pham return; 1094b5ba474fSNhat Pham 1095b5ba474fSNhat Pham pool->shrinker->private_data = pool; 1096b5ba474fSNhat Pham pool->shrinker->scan_objects = zswap_shrinker_scan; 1097b5ba474fSNhat Pham pool->shrinker->count_objects = zswap_shrinker_count; 1098b5ba474fSNhat Pham pool->shrinker->batch = 0; 1099b5ba474fSNhat Pham pool->shrinker->seeks = DEFAULT_SEEKS; 1100b5ba474fSNhat Pham } 1101b5ba474fSNhat Pham 1102b5ba474fSNhat Pham /********************************* 11032b281117SSeth Jennings * per-cpu code 11042b281117SSeth Jennings **********************************/ 1105cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 1106f1c54846SDan Streetman { 1107cab7a7e5SSebastian Andrzej Siewior struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 11081ec3b5feSBarry Song struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 11091ec3b5feSBarry Song struct crypto_acomp *acomp; 11101ec3b5feSBarry Song struct acomp_req *req; 11118ba2f844SChengming Zhou int ret; 11128ba2f844SChengming Zhou 11138ba2f844SChengming Zhou mutex_init(&acomp_ctx->mutex); 11148ba2f844SChengming Zhou 11158ba2f844SChengming Zhou acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 11168ba2f844SChengming Zhou if (!acomp_ctx->buffer) 11178ba2f844SChengming Zhou return -ENOMEM; 1118f1c54846SDan Streetman 11191ec3b5feSBarry Song acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 11201ec3b5feSBarry Song if (IS_ERR(acomp)) { 11211ec3b5feSBarry Song pr_err("could not alloc crypto acomp %s : %ld\n", 11221ec3b5feSBarry Song pool->tfm_name, PTR_ERR(acomp)); 11238ba2f844SChengming Zhou ret = PTR_ERR(acomp); 11248ba2f844SChengming Zhou goto acomp_fail; 11251ec3b5feSBarry Song } 11261ec3b5feSBarry Song acomp_ctx->acomp = acomp; 1127cab7a7e5SSebastian Andrzej Siewior 11281ec3b5feSBarry Song req = acomp_request_alloc(acomp_ctx->acomp); 11291ec3b5feSBarry Song if (!req) { 11301ec3b5feSBarry Song pr_err("could not alloc crypto acomp_request %s\n", 11311ec3b5feSBarry Song pool->tfm_name); 11328ba2f844SChengming Zhou ret = -ENOMEM; 11338ba2f844SChengming Zhou goto req_fail; 1134f1c54846SDan Streetman } 11351ec3b5feSBarry Song acomp_ctx->req = req; 11361ec3b5feSBarry Song 11371ec3b5feSBarry Song crypto_init_wait(&acomp_ctx->wait); 11381ec3b5feSBarry Song /* 11391ec3b5feSBarry Song * if the backend of acomp is async zip, crypto_req_done() will wakeup 11401ec3b5feSBarry Song * crypto_wait_req(); if the backend of acomp is scomp, the callback 11411ec3b5feSBarry Song * won't be called, crypto_wait_req() will return without blocking. 11421ec3b5feSBarry Song */ 11431ec3b5feSBarry Song acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 11441ec3b5feSBarry Song crypto_req_done, &acomp_ctx->wait); 11451ec3b5feSBarry Song 1146cab7a7e5SSebastian Andrzej Siewior return 0; 11478ba2f844SChengming Zhou 11488ba2f844SChengming Zhou req_fail: 11498ba2f844SChengming Zhou crypto_free_acomp(acomp_ctx->acomp); 11508ba2f844SChengming Zhou acomp_fail: 11518ba2f844SChengming Zhou kfree(acomp_ctx->buffer); 11528ba2f844SChengming Zhou return ret; 1153cab7a7e5SSebastian Andrzej Siewior } 1154cab7a7e5SSebastian Andrzej Siewior 1155cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 1156cab7a7e5SSebastian Andrzej Siewior { 1157cab7a7e5SSebastian Andrzej Siewior struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 11581ec3b5feSBarry Song struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 1159cab7a7e5SSebastian Andrzej Siewior 11601ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx)) { 11611ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx->req)) 11621ec3b5feSBarry Song acomp_request_free(acomp_ctx->req); 11631ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 11641ec3b5feSBarry Song crypto_free_acomp(acomp_ctx->acomp); 11658ba2f844SChengming Zhou kfree(acomp_ctx->buffer); 11661ec3b5feSBarry Song } 11671ec3b5feSBarry Song 1168f1c54846SDan Streetman return 0; 1169f1c54846SDan Streetman } 1170f1c54846SDan Streetman 1171a65b0e76SDomenico Cerasuolo static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 1172a65b0e76SDomenico Cerasuolo spinlock_t *lock, void *arg) 1173f999f38bSDomenico Cerasuolo { 1174a65b0e76SDomenico Cerasuolo struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); 1175b5ba474fSNhat Pham bool *encountered_page_in_swapcache = (bool *)arg; 11765878303cSChengming Zhou swp_entry_t swpentry; 1177a65b0e76SDomenico Cerasuolo enum lru_status ret = LRU_REMOVED_RETRY; 1178a65b0e76SDomenico Cerasuolo int writeback_result; 1179f999f38bSDomenico Cerasuolo 1180f999f38bSDomenico Cerasuolo /* 11815878303cSChengming Zhou * Rotate the entry to the tail before unlocking the LRU, 11825878303cSChengming Zhou * so that in case of an invalidation race concurrent 11835878303cSChengming Zhou * reclaimers don't waste their time on it. 11845878303cSChengming Zhou * 11855878303cSChengming Zhou * If writeback succeeds, or failure is due to the entry 11865878303cSChengming Zhou * being invalidated by the swap subsystem, the invalidation 11875878303cSChengming Zhou * will unlink and free it. 11885878303cSChengming Zhou * 11895878303cSChengming Zhou * Temporary failures, where the same entry should be tried 11905878303cSChengming Zhou * again immediately, almost never happen for this shrinker. 11915878303cSChengming Zhou * We don't do any trylocking; -ENOMEM comes closest, 11925878303cSChengming Zhou * but that's extremely rare and doesn't happen spuriously 11935878303cSChengming Zhou * either. Don't bother distinguishing this case. 11945878303cSChengming Zhou * 11955878303cSChengming Zhou * But since they do exist in theory, the entry cannot just 11965878303cSChengming Zhou * be unlinked, or we could leak it. Hence, rotate. 11975878303cSChengming Zhou */ 11985878303cSChengming Zhou list_move_tail(item, &l->list); 11995878303cSChengming Zhou 12005878303cSChengming Zhou /* 1201f999f38bSDomenico Cerasuolo * Once the lru lock is dropped, the entry might get freed. The 12025878303cSChengming Zhou * swpentry is copied to the stack, and entry isn't deref'd again 1203f999f38bSDomenico Cerasuolo * until the entry is verified to still be alive in the tree. 1204f999f38bSDomenico Cerasuolo */ 12055878303cSChengming Zhou swpentry = entry->swpentry; 12065878303cSChengming Zhou 1207a65b0e76SDomenico Cerasuolo /* 1208a65b0e76SDomenico Cerasuolo * It's safe to drop the lock here because we return either 1209a65b0e76SDomenico Cerasuolo * LRU_REMOVED_RETRY or LRU_RETRY. 1210a65b0e76SDomenico Cerasuolo */ 1211a65b0e76SDomenico Cerasuolo spin_unlock(lock); 1212f999f38bSDomenico Cerasuolo 12135878303cSChengming Zhou writeback_result = zswap_writeback_entry(entry, swpentry); 1214a65b0e76SDomenico Cerasuolo 1215a65b0e76SDomenico Cerasuolo if (writeback_result) { 1216a65b0e76SDomenico Cerasuolo zswap_reject_reclaim_fail++; 1217a65b0e76SDomenico Cerasuolo ret = LRU_RETRY; 1218b5ba474fSNhat Pham 1219b5ba474fSNhat Pham /* 1220b5ba474fSNhat Pham * Encountering a page already in swap cache is a sign that we are shrinking 1221b5ba474fSNhat Pham * into the warmer region. We should terminate shrinking (if we're in the dynamic 1222b5ba474fSNhat Pham * shrinker context). 1223b5ba474fSNhat Pham */ 122427d3969bSChengming Zhou if (writeback_result == -EEXIST && encountered_page_in_swapcache) 1225b5ba474fSNhat Pham *encountered_page_in_swapcache = true; 12265878303cSChengming Zhou } else { 1227a65b0e76SDomenico Cerasuolo zswap_written_back_pages++; 12285878303cSChengming Zhou } 1229f999f38bSDomenico Cerasuolo 1230a65b0e76SDomenico Cerasuolo spin_lock(lock); 1231a65b0e76SDomenico Cerasuolo return ret; 1232a65b0e76SDomenico Cerasuolo } 1233a65b0e76SDomenico Cerasuolo 1234a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg) 1235a65b0e76SDomenico Cerasuolo { 1236a65b0e76SDomenico Cerasuolo struct zswap_pool *pool; 1237a65b0e76SDomenico Cerasuolo int nid, shrunk = 0; 1238a65b0e76SDomenico Cerasuolo 1239501a06feSNhat Pham if (!mem_cgroup_zswap_writeback_enabled(memcg)) 1240501a06feSNhat Pham return -EINVAL; 1241501a06feSNhat Pham 1242a65b0e76SDomenico Cerasuolo /* 1243a65b0e76SDomenico Cerasuolo * Skip zombies because their LRUs are reparented and we would be 1244a65b0e76SDomenico Cerasuolo * reclaiming from the parent instead of the dead memcg. 1245a65b0e76SDomenico Cerasuolo */ 1246a65b0e76SDomenico Cerasuolo if (memcg && !mem_cgroup_online(memcg)) 1247a65b0e76SDomenico Cerasuolo return -ENOENT; 1248a65b0e76SDomenico Cerasuolo 1249a65b0e76SDomenico Cerasuolo pool = zswap_pool_current_get(); 1250a65b0e76SDomenico Cerasuolo if (!pool) 1251a65b0e76SDomenico Cerasuolo return -EINVAL; 1252a65b0e76SDomenico Cerasuolo 1253a65b0e76SDomenico Cerasuolo for_each_node_state(nid, N_NORMAL_MEMORY) { 1254a65b0e76SDomenico Cerasuolo unsigned long nr_to_walk = 1; 1255a65b0e76SDomenico Cerasuolo 1256a65b0e76SDomenico Cerasuolo shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg, 1257a65b0e76SDomenico Cerasuolo &shrink_memcg_cb, NULL, &nr_to_walk); 1258a65b0e76SDomenico Cerasuolo } 1259a65b0e76SDomenico Cerasuolo zswap_pool_put(pool); 1260a65b0e76SDomenico Cerasuolo return shrunk ? 0 : -EAGAIN; 1261f999f38bSDomenico Cerasuolo } 1262f999f38bSDomenico Cerasuolo 126345190f01SVitaly Wool static void shrink_worker(struct work_struct *w) 126445190f01SVitaly Wool { 126545190f01SVitaly Wool struct zswap_pool *pool = container_of(w, typeof(*pool), 126645190f01SVitaly Wool shrink_work); 1267a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 1268e0228d59SDomenico Cerasuolo int ret, failures = 0; 126945190f01SVitaly Wool 1270a65b0e76SDomenico Cerasuolo /* global reclaim will select cgroup in a round-robin fashion. */ 1271e0228d59SDomenico Cerasuolo do { 1272a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 1273a65b0e76SDomenico Cerasuolo pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 1274a65b0e76SDomenico Cerasuolo memcg = pool->next_shrink; 1275a65b0e76SDomenico Cerasuolo 1276a65b0e76SDomenico Cerasuolo /* 1277a65b0e76SDomenico Cerasuolo * We need to retry if we have gone through a full round trip, or if we 1278a65b0e76SDomenico Cerasuolo * got an offline memcg (or else we risk undoing the effect of the 1279a65b0e76SDomenico Cerasuolo * zswap memcg offlining cleanup callback). This is not catastrophic 1280a65b0e76SDomenico Cerasuolo * per se, but it will keep the now offlined memcg hostage for a while. 1281a65b0e76SDomenico Cerasuolo * 1282a65b0e76SDomenico Cerasuolo * Note that if we got an online memcg, we will keep the extra 1283a65b0e76SDomenico Cerasuolo * reference in case the original reference obtained by mem_cgroup_iter 1284a65b0e76SDomenico Cerasuolo * is dropped by the zswap memcg offlining callback, ensuring that the 1285a65b0e76SDomenico Cerasuolo * memcg is not killed when we are reclaiming. 1286a65b0e76SDomenico Cerasuolo */ 1287a65b0e76SDomenico Cerasuolo if (!memcg) { 1288a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1289e0228d59SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1290e0228d59SDomenico Cerasuolo break; 1291a65b0e76SDomenico Cerasuolo 1292a65b0e76SDomenico Cerasuolo goto resched; 1293e0228d59SDomenico Cerasuolo } 1294a65b0e76SDomenico Cerasuolo 1295a65b0e76SDomenico Cerasuolo if (!mem_cgroup_tryget_online(memcg)) { 1296a65b0e76SDomenico Cerasuolo /* drop the reference from mem_cgroup_iter() */ 1297a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, memcg); 1298a65b0e76SDomenico Cerasuolo pool->next_shrink = NULL; 1299a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1300a65b0e76SDomenico Cerasuolo 1301a65b0e76SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 1302a65b0e76SDomenico Cerasuolo break; 1303a65b0e76SDomenico Cerasuolo 1304a65b0e76SDomenico Cerasuolo goto resched; 1305a65b0e76SDomenico Cerasuolo } 1306a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1307a65b0e76SDomenico Cerasuolo 1308a65b0e76SDomenico Cerasuolo ret = shrink_memcg(memcg); 1309a65b0e76SDomenico Cerasuolo /* drop the extra reference */ 1310a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1311a65b0e76SDomenico Cerasuolo 1312a65b0e76SDomenico Cerasuolo if (ret == -EINVAL) 1313a65b0e76SDomenico Cerasuolo break; 1314a65b0e76SDomenico Cerasuolo if (ret && ++failures == MAX_RECLAIM_RETRIES) 1315a65b0e76SDomenico Cerasuolo break; 1316a65b0e76SDomenico Cerasuolo 1317a65b0e76SDomenico Cerasuolo resched: 1318e0228d59SDomenico Cerasuolo cond_resched(); 1319e0228d59SDomenico Cerasuolo } while (!zswap_can_accept()); 132045190f01SVitaly Wool zswap_pool_put(pool); 132145190f01SVitaly Wool } 132245190f01SVitaly Wool 1323fa9ad6e2SJohannes Weiner static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) 1324fa9ad6e2SJohannes Weiner { 1325fa9ad6e2SJohannes Weiner struct crypto_acomp_ctx *acomp_ctx; 1326fa9ad6e2SJohannes Weiner struct scatterlist input, output; 1327fa9ad6e2SJohannes Weiner unsigned int dlen = PAGE_SIZE; 1328fa9ad6e2SJohannes Weiner unsigned long handle; 1329fa9ad6e2SJohannes Weiner struct zpool *zpool; 1330fa9ad6e2SJohannes Weiner char *buf; 1331fa9ad6e2SJohannes Weiner gfp_t gfp; 1332fa9ad6e2SJohannes Weiner int ret; 1333fa9ad6e2SJohannes Weiner u8 *dst; 1334fa9ad6e2SJohannes Weiner 1335fa9ad6e2SJohannes Weiner acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1336fa9ad6e2SJohannes Weiner 1337fa9ad6e2SJohannes Weiner mutex_lock(&acomp_ctx->mutex); 1338fa9ad6e2SJohannes Weiner 1339fa9ad6e2SJohannes Weiner dst = acomp_ctx->buffer; 1340fa9ad6e2SJohannes Weiner sg_init_table(&input, 1); 1341fa9ad6e2SJohannes Weiner sg_set_page(&input, &folio->page, PAGE_SIZE, 0); 1342fa9ad6e2SJohannes Weiner 1343fa9ad6e2SJohannes Weiner /* 1344fa9ad6e2SJohannes Weiner * We need PAGE_SIZE * 2 here since there maybe over-compression case, 1345fa9ad6e2SJohannes Weiner * and hardware-accelerators may won't check the dst buffer size, so 1346fa9ad6e2SJohannes Weiner * giving the dst buffer with enough length to avoid buffer overflow. 1347fa9ad6e2SJohannes Weiner */ 1348fa9ad6e2SJohannes Weiner sg_init_one(&output, dst, PAGE_SIZE * 2); 1349fa9ad6e2SJohannes Weiner acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1350fa9ad6e2SJohannes Weiner 1351fa9ad6e2SJohannes Weiner /* 1352fa9ad6e2SJohannes Weiner * it maybe looks a little bit silly that we send an asynchronous request, 1353fa9ad6e2SJohannes Weiner * then wait for its completion synchronously. This makes the process look 1354fa9ad6e2SJohannes Weiner * synchronous in fact. 1355fa9ad6e2SJohannes Weiner * Theoretically, acomp supports users send multiple acomp requests in one 1356fa9ad6e2SJohannes Weiner * acomp instance, then get those requests done simultaneously. but in this 1357fa9ad6e2SJohannes Weiner * case, zswap actually does store and load page by page, there is no 1358fa9ad6e2SJohannes Weiner * existing method to send the second page before the first page is done 1359fa9ad6e2SJohannes Weiner * in one thread doing zwap. 1360fa9ad6e2SJohannes Weiner * but in different threads running on different cpu, we have different 1361fa9ad6e2SJohannes Weiner * acomp instance, so multiple threads can do (de)compression in parallel. 1362fa9ad6e2SJohannes Weiner */ 1363fa9ad6e2SJohannes Weiner ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1364fa9ad6e2SJohannes Weiner dlen = acomp_ctx->req->dlen; 1365fa9ad6e2SJohannes Weiner if (ret) { 1366fa9ad6e2SJohannes Weiner zswap_reject_compress_fail++; 1367fa9ad6e2SJohannes Weiner goto unlock; 1368fa9ad6e2SJohannes Weiner } 1369fa9ad6e2SJohannes Weiner 1370fa9ad6e2SJohannes Weiner zpool = zswap_find_zpool(entry); 1371fa9ad6e2SJohannes Weiner gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1372fa9ad6e2SJohannes Weiner if (zpool_malloc_support_movable(zpool)) 1373fa9ad6e2SJohannes Weiner gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1374fa9ad6e2SJohannes Weiner ret = zpool_malloc(zpool, dlen, gfp, &handle); 1375fa9ad6e2SJohannes Weiner if (ret == -ENOSPC) { 1376fa9ad6e2SJohannes Weiner zswap_reject_compress_poor++; 1377fa9ad6e2SJohannes Weiner goto unlock; 1378fa9ad6e2SJohannes Weiner } 1379fa9ad6e2SJohannes Weiner if (ret) { 1380fa9ad6e2SJohannes Weiner zswap_reject_alloc_fail++; 1381fa9ad6e2SJohannes Weiner goto unlock; 1382fa9ad6e2SJohannes Weiner } 1383fa9ad6e2SJohannes Weiner 1384fa9ad6e2SJohannes Weiner buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 1385fa9ad6e2SJohannes Weiner memcpy(buf, dst, dlen); 1386fa9ad6e2SJohannes Weiner zpool_unmap_handle(zpool, handle); 1387fa9ad6e2SJohannes Weiner 1388fa9ad6e2SJohannes Weiner entry->handle = handle; 1389fa9ad6e2SJohannes Weiner entry->length = dlen; 1390fa9ad6e2SJohannes Weiner 1391fa9ad6e2SJohannes Weiner unlock: 1392fa9ad6e2SJohannes Weiner mutex_unlock(&acomp_ctx->mutex); 1393fa9ad6e2SJohannes Weiner return ret == 0; 1394fa9ad6e2SJohannes Weiner } 1395fa9ad6e2SJohannes Weiner 1396ff2972aaSJohannes Weiner static void zswap_decompress(struct zswap_entry *entry, struct page *page) 139732acba4cSChengming Zhou { 139832acba4cSChengming Zhou struct zpool *zpool = zswap_find_zpool(entry); 139932acba4cSChengming Zhou struct scatterlist input, output; 140032acba4cSChengming Zhou struct crypto_acomp_ctx *acomp_ctx; 140132acba4cSChengming Zhou u8 *src; 140232acba4cSChengming Zhou 140332acba4cSChengming Zhou acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 14048ba2f844SChengming Zhou mutex_lock(&acomp_ctx->mutex); 140532acba4cSChengming Zhou 140632acba4cSChengming Zhou src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 140732acba4cSChengming Zhou if (!zpool_can_sleep_mapped(zpool)) { 14088ba2f844SChengming Zhou memcpy(acomp_ctx->buffer, src, entry->length); 14098ba2f844SChengming Zhou src = acomp_ctx->buffer; 141032acba4cSChengming Zhou zpool_unmap_handle(zpool, entry->handle); 141132acba4cSChengming Zhou } 141232acba4cSChengming Zhou 141332acba4cSChengming Zhou sg_init_one(&input, src, entry->length); 141432acba4cSChengming Zhou sg_init_table(&output, 1); 141532acba4cSChengming Zhou sg_set_page(&output, page, PAGE_SIZE, 0); 141632acba4cSChengming Zhou acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); 141732acba4cSChengming Zhou BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); 141832acba4cSChengming Zhou BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); 14198ba2f844SChengming Zhou mutex_unlock(&acomp_ctx->mutex); 142032acba4cSChengming Zhou 142132acba4cSChengming Zhou if (zpool_can_sleep_mapped(zpool)) 142232acba4cSChengming Zhou zpool_unmap_handle(zpool, entry->handle); 142332acba4cSChengming Zhou } 142432acba4cSChengming Zhou 142590b0fc26SDan Streetman /********************************* 14262b281117SSeth Jennings * writeback code 14272b281117SSeth Jennings **********************************/ 14282b281117SSeth Jennings /* 142996c7b0b4SMatthew Wilcox (Oracle) * Attempts to free an entry by adding a folio to the swap cache, 143096c7b0b4SMatthew Wilcox (Oracle) * decompressing the entry data into the folio, and issuing a 143196c7b0b4SMatthew Wilcox (Oracle) * bio write to write the folio back to the swap device. 14322b281117SSeth Jennings * 143396c7b0b4SMatthew Wilcox (Oracle) * This can be thought of as a "resumed writeback" of the folio 14342b281117SSeth Jennings * to the swap device. We are basically resuming the same swap 143542c06a0eSJohannes Weiner * writeback path that was intercepted with the zswap_store() 143696c7b0b4SMatthew Wilcox (Oracle) * in the first place. After the folio has been decompressed into 14372b281117SSeth Jennings * the swap cache, the compressed version stored by zswap can be 14382b281117SSeth Jennings * freed. 14392b281117SSeth Jennings */ 14400bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry, 14415878303cSChengming Zhou swp_entry_t swpentry) 14422b281117SSeth Jennings { 14435878303cSChengming Zhou struct zswap_tree *tree; 144496c7b0b4SMatthew Wilcox (Oracle) struct folio *folio; 1445ddc1a5cbSHugh Dickins struct mempolicy *mpol; 144696c7b0b4SMatthew Wilcox (Oracle) bool folio_was_allocated; 14472b281117SSeth Jennings struct writeback_control wbc = { 14482b281117SSeth Jennings .sync_mode = WB_SYNC_NONE, 14492b281117SSeth Jennings }; 14502b281117SSeth Jennings 145196c7b0b4SMatthew Wilcox (Oracle) /* try to allocate swap cache folio */ 1452ddc1a5cbSHugh Dickins mpol = get_task_policy(current); 145396c7b0b4SMatthew Wilcox (Oracle) folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 145496c7b0b4SMatthew Wilcox (Oracle) NO_INTERLEAVE_INDEX, &folio_was_allocated, true); 145596c7b0b4SMatthew Wilcox (Oracle) if (!folio) 1456e947ba0bSChengming Zhou return -ENOMEM; 14572b281117SSeth Jennings 1458e947ba0bSChengming Zhou /* 14595878303cSChengming Zhou * Found an existing folio, we raced with swapin or concurrent 14605878303cSChengming Zhou * shrinker. We generally writeback cold folios from zswap, and 14615878303cSChengming Zhou * swapin means the folio just became hot, so skip this folio. 14625878303cSChengming Zhou * For unlikely concurrent shrinker case, it will be unlinked 14635878303cSChengming Zhou * and freed when invalidated by the concurrent shrinker anyway. 1464e947ba0bSChengming Zhou */ 146596c7b0b4SMatthew Wilcox (Oracle) if (!folio_was_allocated) { 146696c7b0b4SMatthew Wilcox (Oracle) folio_put(folio); 1467e947ba0bSChengming Zhou return -EEXIST; 146898804a94SJohannes Weiner } 14692b281117SSeth Jennings 147004fc7816SDomenico Cerasuolo /* 147196c7b0b4SMatthew Wilcox (Oracle) * folio is locked, and the swapcache is now secured against 147298804a94SJohannes Weiner * concurrent swapping to and from the slot. Verify that the 147398804a94SJohannes Weiner * swap entry hasn't been invalidated and recycled behind our 147498804a94SJohannes Weiner * backs (our zswap_entry reference doesn't prevent that), to 147596c7b0b4SMatthew Wilcox (Oracle) * avoid overwriting a new swap folio with old compressed data. 147604fc7816SDomenico Cerasuolo */ 14775878303cSChengming Zhou tree = swap_zswap_tree(swpentry); 147804fc7816SDomenico Cerasuolo spin_lock(&tree->lock); 14795878303cSChengming Zhou if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) { 148004fc7816SDomenico Cerasuolo spin_unlock(&tree->lock); 148196c7b0b4SMatthew Wilcox (Oracle) delete_from_swap_cache(folio); 1482e3b63e96SYosry Ahmed folio_unlock(folio); 1483e3b63e96SYosry Ahmed folio_put(folio); 1484e947ba0bSChengming Zhou return -ENOMEM; 148504fc7816SDomenico Cerasuolo } 14865878303cSChengming Zhou 14875878303cSChengming Zhou /* Safe to deref entry after the entry is verified above. */ 14885878303cSChengming Zhou zswap_entry_get(entry); 148904fc7816SDomenico Cerasuolo spin_unlock(&tree->lock); 149004fc7816SDomenico Cerasuolo 1491ff2972aaSJohannes Weiner zswap_decompress(entry, &folio->page); 14922b281117SSeth Jennings 14935878303cSChengming Zhou count_vm_event(ZSWPWB); 14945878303cSChengming Zhou if (entry->objcg) 14955878303cSChengming Zhou count_objcg_event(entry->objcg, ZSWPWB); 14965878303cSChengming Zhou 14975878303cSChengming Zhou spin_lock(&tree->lock); 14985878303cSChengming Zhou zswap_invalidate_entry(tree, entry); 14995878303cSChengming Zhou zswap_entry_put(entry); 15005878303cSChengming Zhou spin_unlock(&tree->lock); 15015878303cSChengming Zhou 150296c7b0b4SMatthew Wilcox (Oracle) /* folio is up to date */ 150396c7b0b4SMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 15042b281117SSeth Jennings 1505b349acc7SWeijie Yang /* move it to the tail of the inactive list after end_writeback */ 150696c7b0b4SMatthew Wilcox (Oracle) folio_set_reclaim(folio); 1507b349acc7SWeijie Yang 15082b281117SSeth Jennings /* start writeback */ 1509b99b4e0dSMatthew Wilcox (Oracle) __swap_writepage(folio, &wbc); 151096c7b0b4SMatthew Wilcox (Oracle) folio_put(folio); 15112b281117SSeth Jennings 1512e947ba0bSChengming Zhou return 0; 15132b281117SSeth Jennings } 15142b281117SSeth Jennings 1515a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1516a85f878bSSrividya Desireddy { 1517a85f878bSSrividya Desireddy unsigned long *page; 151862bf1258STaejoon Song unsigned long val; 151962bf1258STaejoon Song unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1520a85f878bSSrividya Desireddy 1521a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 152262bf1258STaejoon Song val = page[0]; 152362bf1258STaejoon Song 152462bf1258STaejoon Song if (val != page[last_pos]) 152562bf1258STaejoon Song return 0; 152662bf1258STaejoon Song 152762bf1258STaejoon Song for (pos = 1; pos < last_pos; pos++) { 152862bf1258STaejoon Song if (val != page[pos]) 1529a85f878bSSrividya Desireddy return 0; 1530a85f878bSSrividya Desireddy } 153162bf1258STaejoon Song 153262bf1258STaejoon Song *value = val; 153362bf1258STaejoon Song 1534a85f878bSSrividya Desireddy return 1; 1535a85f878bSSrividya Desireddy } 1536a85f878bSSrividya Desireddy 1537a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value) 1538a85f878bSSrividya Desireddy { 1539a85f878bSSrividya Desireddy unsigned long *page; 1540a85f878bSSrividya Desireddy 1541a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 1542a85f878bSSrividya Desireddy memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1543a85f878bSSrividya Desireddy } 1544a85f878bSSrividya Desireddy 154534f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio) 15462b281117SSeth Jennings { 15473d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 154842c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 154944c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 15502b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 1551f4840ccfSJohannes Weiner struct obj_cgroup *objcg = NULL; 1552a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg = NULL; 1553be7fc97cSJohannes Weiner struct zswap_pool *shrink_pool; 155442c06a0eSJohannes Weiner 155534f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 155634f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 15572b281117SSeth Jennings 155834f4c198SMatthew Wilcox (Oracle) /* Large folios aren't supported */ 155934f4c198SMatthew Wilcox (Oracle) if (folio_test_large(folio)) 156042c06a0eSJohannes Weiner return false; 15617ba71669SHuang Ying 15620bdf0efaSNhat Pham /* 1563ca56489cSDomenico Cerasuolo * If this is a duplicate, it must be removed before attempting to store 1564ca56489cSDomenico Cerasuolo * it, otherwise, if the store fails the old page won't be removed from 1565ca56489cSDomenico Cerasuolo * the tree, and it might be written back overriding the new data. 1566ca56489cSDomenico Cerasuolo */ 1567ca56489cSDomenico Cerasuolo spin_lock(&tree->lock); 1568be7fc97cSJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 1569be7fc97cSJohannes Weiner if (entry) { 1570be7fc97cSJohannes Weiner zswap_invalidate_entry(tree, entry); 1571ca56489cSDomenico Cerasuolo zswap_duplicate_entry++; 1572ca56489cSDomenico Cerasuolo } 1573ca56489cSDomenico Cerasuolo spin_unlock(&tree->lock); 1574678e54d4SChengming Zhou 1575678e54d4SChengming Zhou if (!zswap_enabled) 1576678e54d4SChengming Zhou return false; 1577678e54d4SChengming Zhou 1578074e3e26SMatthew Wilcox (Oracle) objcg = get_obj_cgroup_from_folio(folio); 1579a65b0e76SDomenico Cerasuolo if (objcg && !obj_cgroup_may_zswap(objcg)) { 1580a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1581a65b0e76SDomenico Cerasuolo if (shrink_memcg(memcg)) { 1582a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 15830bdf0efaSNhat Pham goto reject; 1584a65b0e76SDomenico Cerasuolo } 1585a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1586a65b0e76SDomenico Cerasuolo } 1587f4840ccfSJohannes Weiner 15882b281117SSeth Jennings /* reclaim space if needed */ 15892b281117SSeth Jennings if (zswap_is_full()) { 15902b281117SSeth Jennings zswap_pool_limit_hit++; 159145190f01SVitaly Wool zswap_pool_reached_full = true; 1592f4840ccfSJohannes Weiner goto shrink; 15932b281117SSeth Jennings } 159416e536efSLi Wang 159545190f01SVitaly Wool if (zswap_pool_reached_full) { 159642c06a0eSJohannes Weiner if (!zswap_can_accept()) 1597e0228d59SDomenico Cerasuolo goto shrink; 159842c06a0eSJohannes Weiner else 159945190f01SVitaly Wool zswap_pool_reached_full = false; 16002b281117SSeth Jennings } 16012b281117SSeth Jennings 16022b281117SSeth Jennings /* allocate entry */ 1603be7fc97cSJohannes Weiner entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); 16042b281117SSeth Jennings if (!entry) { 16052b281117SSeth Jennings zswap_reject_kmemcache_fail++; 16062b281117SSeth Jennings goto reject; 16072b281117SSeth Jennings } 16082b281117SSeth Jennings 1609a85f878bSSrividya Desireddy if (zswap_same_filled_pages_enabled) { 1610be7fc97cSJohannes Weiner unsigned long value; 1611be7fc97cSJohannes Weiner u8 *src; 1612be7fc97cSJohannes Weiner 1613be7fc97cSJohannes Weiner src = kmap_local_folio(folio, 0); 1614a85f878bSSrividya Desireddy if (zswap_is_page_same_filled(src, &value)) { 1615003ae2fbSFabio M. De Francesco kunmap_local(src); 1616a85f878bSSrividya Desireddy entry->length = 0; 1617a85f878bSSrividya Desireddy entry->value = value; 1618a85f878bSSrividya Desireddy atomic_inc(&zswap_same_filled_pages); 1619a85f878bSSrividya Desireddy goto insert_entry; 1620a85f878bSSrividya Desireddy } 1621003ae2fbSFabio M. De Francesco kunmap_local(src); 1622a85f878bSSrividya Desireddy } 1623a85f878bSSrividya Desireddy 162442c06a0eSJohannes Weiner if (!zswap_non_same_filled_pages_enabled) 1625cb325dddSMaciej S. Szmigiero goto freepage; 1626cb325dddSMaciej S. Szmigiero 1627f1c54846SDan Streetman /* if entry is successfully added, it keeps the reference */ 1628f1c54846SDan Streetman entry->pool = zswap_pool_current_get(); 162942c06a0eSJohannes Weiner if (!entry->pool) 16302b281117SSeth Jennings goto freepage; 16312b281117SSeth Jennings 1632a65b0e76SDomenico Cerasuolo if (objcg) { 1633a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1634a65b0e76SDomenico Cerasuolo if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) { 1635a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1636a65b0e76SDomenico Cerasuolo goto put_pool; 1637a65b0e76SDomenico Cerasuolo } 1638a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1639a65b0e76SDomenico Cerasuolo } 1640a65b0e76SDomenico Cerasuolo 1641fa9ad6e2SJohannes Weiner if (!zswap_compress(folio, entry)) 1642fa9ad6e2SJohannes Weiner goto put_pool; 16431ec3b5feSBarry Song 1644a85f878bSSrividya Desireddy insert_entry: 1645be7fc97cSJohannes Weiner entry->swpentry = swp; 1646f4840ccfSJohannes Weiner entry->objcg = objcg; 1647f4840ccfSJohannes Weiner if (objcg) { 1648f4840ccfSJohannes Weiner obj_cgroup_charge_zswap(objcg, entry->length); 1649f4840ccfSJohannes Weiner /* Account before objcg ref is moved to tree */ 1650f4840ccfSJohannes Weiner count_objcg_event(objcg, ZSWPOUT); 1651f4840ccfSJohannes Weiner } 1652f4840ccfSJohannes Weiner 16532b281117SSeth Jennings /* map */ 16542b281117SSeth Jennings spin_lock(&tree->lock); 1655ca56489cSDomenico Cerasuolo /* 1656ca56489cSDomenico Cerasuolo * A duplicate entry should have been removed at the beginning of this 1657ca56489cSDomenico Cerasuolo * function. Since the swap entry should be pinned, if a duplicate is 1658ca56489cSDomenico Cerasuolo * found again here it means that something went wrong in the swap 1659ca56489cSDomenico Cerasuolo * cache. 1660ca56489cSDomenico Cerasuolo */ 166142c06a0eSJohannes Weiner while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 1662ca56489cSDomenico Cerasuolo WARN_ON(1); 16632b281117SSeth Jennings zswap_duplicate_entry++; 166456c67049SJohannes Weiner zswap_invalidate_entry(tree, dupentry); 16652b281117SSeth Jennings } 166635499e2bSDomenico Cerasuolo if (entry->length) { 1667a65b0e76SDomenico Cerasuolo INIT_LIST_HEAD(&entry->lru); 1668a65b0e76SDomenico Cerasuolo zswap_lru_add(&entry->pool->list_lru, entry); 1669b5ba474fSNhat Pham atomic_inc(&entry->pool->nr_stored); 1670f999f38bSDomenico Cerasuolo } 16712b281117SSeth Jennings spin_unlock(&tree->lock); 16722b281117SSeth Jennings 16732b281117SSeth Jennings /* update stats */ 16742b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 1675f1c54846SDan Streetman zswap_update_total_size(); 1676f6498b77SJohannes Weiner count_vm_event(ZSWPOUT); 16772b281117SSeth Jennings 167842c06a0eSJohannes Weiner return true; 16792b281117SSeth Jennings 1680a65b0e76SDomenico Cerasuolo put_pool: 1681f1c54846SDan Streetman zswap_pool_put(entry->pool); 1682f1c54846SDan Streetman freepage: 16832b281117SSeth Jennings zswap_entry_cache_free(entry); 16842b281117SSeth Jennings reject: 1685f4840ccfSJohannes Weiner if (objcg) 1686f4840ccfSJohannes Weiner obj_cgroup_put(objcg); 168742c06a0eSJohannes Weiner return false; 1688f4840ccfSJohannes Weiner 1689f4840ccfSJohannes Weiner shrink: 1690be7fc97cSJohannes Weiner shrink_pool = zswap_pool_last_get(); 1691be7fc97cSJohannes Weiner if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work)) 1692be7fc97cSJohannes Weiner zswap_pool_put(shrink_pool); 1693f4840ccfSJohannes Weiner goto reject; 16942b281117SSeth Jennings } 16952b281117SSeth Jennings 1696ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio) 16972b281117SSeth Jennings { 16983d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 169942c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 1700ca54f6d8SMatthew Wilcox (Oracle) struct page *page = &folio->page; 170144c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp); 17022b281117SSeth Jennings struct zswap_entry *entry; 170332acba4cSChengming Zhou u8 *dst; 170442c06a0eSJohannes Weiner 1705ca54f6d8SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 17062b281117SSeth Jennings 17072b281117SSeth Jennings spin_lock(&tree->lock); 17085b297f70SJohannes Weiner entry = zswap_rb_search(&tree->rbroot, offset); 17092b281117SSeth Jennings if (!entry) { 17102b281117SSeth Jennings spin_unlock(&tree->lock); 171142c06a0eSJohannes Weiner return false; 17122b281117SSeth Jennings } 17135b297f70SJohannes Weiner zswap_entry_get(entry); 17142b281117SSeth Jennings spin_unlock(&tree->lock); 17152b281117SSeth Jennings 171666447fd0SChengming Zhou if (entry->length) 1717ff2972aaSJohannes Weiner zswap_decompress(entry, page); 171866447fd0SChengming Zhou else { 1719003ae2fbSFabio M. De Francesco dst = kmap_local_page(page); 1720a85f878bSSrividya Desireddy zswap_fill_page(dst, entry->value); 1721003ae2fbSFabio M. De Francesco kunmap_local(dst); 1722a85f878bSSrividya Desireddy } 1723a85f878bSSrividya Desireddy 1724f6498b77SJohannes Weiner count_vm_event(ZSWPIN); 1725f4840ccfSJohannes Weiner if (entry->objcg) 1726f4840ccfSJohannes Weiner count_objcg_event(entry->objcg, ZSWPIN); 1727c75f5c1eSChengming Zhou 17282b281117SSeth Jennings spin_lock(&tree->lock); 172966447fd0SChengming Zhou if (zswap_exclusive_loads_enabled) { 1730b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 1731ca54f6d8SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 173235499e2bSDomenico Cerasuolo } else if (entry->length) { 1733a65b0e76SDomenico Cerasuolo zswap_lru_del(&entry->pool->list_lru, entry); 1734a65b0e76SDomenico Cerasuolo zswap_lru_add(&entry->pool->list_lru, entry); 1735b9c91c43SYosry Ahmed } 1736db128f5fSYosry Ahmed zswap_entry_put(entry); 17372b281117SSeth Jennings spin_unlock(&tree->lock); 17382b281117SSeth Jennings 173966447fd0SChengming Zhou return true; 17402b281117SSeth Jennings } 17412b281117SSeth Jennings 174242c06a0eSJohannes Weiner void zswap_invalidate(int type, pgoff_t offset) 17432b281117SSeth Jennings { 174444c7c734SChengming Zhou struct zswap_tree *tree = swap_zswap_tree(swp_entry(type, offset)); 17452b281117SSeth Jennings struct zswap_entry *entry; 17462b281117SSeth Jennings 17472b281117SSeth Jennings spin_lock(&tree->lock); 17482b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 174906ed2289SJohannes Weiner if (entry) 1750b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 17512b281117SSeth Jennings spin_unlock(&tree->lock); 17522b281117SSeth Jennings } 17532b281117SSeth Jennings 175444c7c734SChengming Zhou int zswap_swapon(int type, unsigned long nr_pages) 175542c06a0eSJohannes Weiner { 175644c7c734SChengming Zhou struct zswap_tree *trees, *tree; 175744c7c734SChengming Zhou unsigned int nr, i; 175842c06a0eSJohannes Weiner 175944c7c734SChengming Zhou nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 176044c7c734SChengming Zhou trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); 176144c7c734SChengming Zhou if (!trees) { 176242c06a0eSJohannes Weiner pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1763bb29fd77SChengming Zhou return -ENOMEM; 176442c06a0eSJohannes Weiner } 176542c06a0eSJohannes Weiner 176644c7c734SChengming Zhou for (i = 0; i < nr; i++) { 176744c7c734SChengming Zhou tree = trees + i; 176842c06a0eSJohannes Weiner tree->rbroot = RB_ROOT; 176942c06a0eSJohannes Weiner spin_lock_init(&tree->lock); 177044c7c734SChengming Zhou } 177144c7c734SChengming Zhou 177244c7c734SChengming Zhou nr_zswap_trees[type] = nr; 177344c7c734SChengming Zhou zswap_trees[type] = trees; 1774bb29fd77SChengming Zhou return 0; 177542c06a0eSJohannes Weiner } 177642c06a0eSJohannes Weiner 177742c06a0eSJohannes Weiner void zswap_swapoff(int type) 17782b281117SSeth Jennings { 177944c7c734SChengming Zhou struct zswap_tree *trees = zswap_trees[type]; 178044c7c734SChengming Zhou unsigned int i; 17812b281117SSeth Jennings 178244c7c734SChengming Zhou if (!trees) 17832b281117SSeth Jennings return; 17842b281117SSeth Jennings 178583e68f25SYosry Ahmed /* try_to_unuse() invalidated all the entries already */ 178683e68f25SYosry Ahmed for (i = 0; i < nr_zswap_trees[type]; i++) 178783e68f25SYosry Ahmed WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot)); 178844c7c734SChengming Zhou 178944c7c734SChengming Zhou kvfree(trees); 179044c7c734SChengming Zhou nr_zswap_trees[type] = 0; 1791aa9bca05SWeijie Yang zswap_trees[type] = NULL; 17922b281117SSeth Jennings } 17932b281117SSeth Jennings 17942b281117SSeth Jennings /********************************* 17952b281117SSeth Jennings * debugfs functions 17962b281117SSeth Jennings **********************************/ 17972b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 17982b281117SSeth Jennings #include <linux/debugfs.h> 17992b281117SSeth Jennings 18002b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 18012b281117SSeth Jennings 1802141fdeecSLiu Shixin static int zswap_debugfs_init(void) 18032b281117SSeth Jennings { 18042b281117SSeth Jennings if (!debugfs_initialized()) 18052b281117SSeth Jennings return -ENODEV; 18062b281117SSeth Jennings 18072b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 18082b281117SSeth Jennings 18090825a6f9SJoe Perches debugfs_create_u64("pool_limit_hit", 0444, 18102b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 18110825a6f9SJoe Perches debugfs_create_u64("reject_reclaim_fail", 0444, 18122b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 18130825a6f9SJoe Perches debugfs_create_u64("reject_alloc_fail", 0444, 18142b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 18150825a6f9SJoe Perches debugfs_create_u64("reject_kmemcache_fail", 0444, 18162b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1817cb61dad8SNhat Pham debugfs_create_u64("reject_compress_fail", 0444, 1818cb61dad8SNhat Pham zswap_debugfs_root, &zswap_reject_compress_fail); 18190825a6f9SJoe Perches debugfs_create_u64("reject_compress_poor", 0444, 18202b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 18210825a6f9SJoe Perches debugfs_create_u64("written_back_pages", 0444, 18222b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 18230825a6f9SJoe Perches debugfs_create_u64("duplicate_entry", 0444, 18242b281117SSeth Jennings zswap_debugfs_root, &zswap_duplicate_entry); 18250825a6f9SJoe Perches debugfs_create_u64("pool_total_size", 0444, 182612d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 18270825a6f9SJoe Perches debugfs_create_atomic_t("stored_pages", 0444, 18282b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 1829a85f878bSSrividya Desireddy debugfs_create_atomic_t("same_filled_pages", 0444, 1830a85f878bSSrividya Desireddy zswap_debugfs_root, &zswap_same_filled_pages); 18312b281117SSeth Jennings 18322b281117SSeth Jennings return 0; 18332b281117SSeth Jennings } 18342b281117SSeth Jennings #else 1835141fdeecSLiu Shixin static int zswap_debugfs_init(void) 18362b281117SSeth Jennings { 18372b281117SSeth Jennings return 0; 18382b281117SSeth Jennings } 18392b281117SSeth Jennings #endif 18402b281117SSeth Jennings 18412b281117SSeth Jennings /********************************* 18422b281117SSeth Jennings * module init and exit 18432b281117SSeth Jennings **********************************/ 1844141fdeecSLiu Shixin static int zswap_setup(void) 18452b281117SSeth Jennings { 1846f1c54846SDan Streetman struct zswap_pool *pool; 1847ad7ed770SSebastian Andrzej Siewior int ret; 184860105e12SMinchan Kim 1849b7919122SLiu Shixin zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1850b7919122SLiu Shixin if (!zswap_entry_cache) { 18512b281117SSeth Jennings pr_err("entry cache creation failed\n"); 1852f1c54846SDan Streetman goto cache_fail; 18532b281117SSeth Jennings } 1854f1c54846SDan Streetman 1855cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1856cab7a7e5SSebastian Andrzej Siewior "mm/zswap_pool:prepare", 1857cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_prepare, 1858cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_dead); 1859cab7a7e5SSebastian Andrzej Siewior if (ret) 1860cab7a7e5SSebastian Andrzej Siewior goto hp_fail; 1861cab7a7e5SSebastian Andrzej Siewior 1862f1c54846SDan Streetman pool = __zswap_pool_create_fallback(); 1863ae3d89a7SDan Streetman if (pool) { 1864f1c54846SDan Streetman pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1865b8cf32dcSYosry Ahmed zpool_get_type(pool->zpools[0])); 1866f1c54846SDan Streetman list_add(&pool->list, &zswap_pools); 1867ae3d89a7SDan Streetman zswap_has_pool = true; 1868ae3d89a7SDan Streetman } else { 1869ae3d89a7SDan Streetman pr_err("pool creation failed\n"); 1870ae3d89a7SDan Streetman zswap_enabled = false; 1871ae3d89a7SDan Streetman } 187260105e12SMinchan Kim 18738409a385SRonald Monthero shrink_wq = alloc_workqueue("zswap-shrink", 18748409a385SRonald Monthero WQ_UNBOUND|WQ_MEM_RECLAIM, 1); 187545190f01SVitaly Wool if (!shrink_wq) 187645190f01SVitaly Wool goto fallback_fail; 187745190f01SVitaly Wool 18782b281117SSeth Jennings if (zswap_debugfs_init()) 18792b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 18809021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_SUCCEED; 18812b281117SSeth Jennings return 0; 1882f1c54846SDan Streetman 188345190f01SVitaly Wool fallback_fail: 188438aeb071SDan Carpenter if (pool) 188545190f01SVitaly Wool zswap_pool_destroy(pool); 1886cab7a7e5SSebastian Andrzej Siewior hp_fail: 1887b7919122SLiu Shixin kmem_cache_destroy(zswap_entry_cache); 1888f1c54846SDan Streetman cache_fail: 1889d7b028f5SDan Streetman /* if built-in, we aren't unloaded on failure; don't allow use */ 18909021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_FAILED; 1891d7b028f5SDan Streetman zswap_enabled = false; 18922b281117SSeth Jennings return -ENOMEM; 18932b281117SSeth Jennings } 1894141fdeecSLiu Shixin 1895141fdeecSLiu Shixin static int __init zswap_init(void) 1896141fdeecSLiu Shixin { 1897141fdeecSLiu Shixin if (!zswap_enabled) 1898141fdeecSLiu Shixin return 0; 1899141fdeecSLiu Shixin return zswap_setup(); 1900141fdeecSLiu Shixin } 19012b281117SSeth Jennings /* must be late so crypto has time to come up */ 1902141fdeecSLiu Shixin late_initcall(zswap_init); 19032b281117SSeth Jennings 190468386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 19052b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 1906