1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22b281117SSeth Jennings /* 32b281117SSeth Jennings * zswap.c - zswap driver file 42b281117SSeth Jennings * 542c06a0eSJohannes Weiner * zswap is a cache that takes pages that are in the process 62b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 72b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 82b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 92b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 102b281117SSeth Jennings * 112b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 122b281117SSeth Jennings */ 132b281117SSeth Jennings 142b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 152b281117SSeth Jennings 162b281117SSeth Jennings #include <linux/module.h> 172b281117SSeth Jennings #include <linux/cpu.h> 182b281117SSeth Jennings #include <linux/highmem.h> 192b281117SSeth Jennings #include <linux/slab.h> 202b281117SSeth Jennings #include <linux/spinlock.h> 212b281117SSeth Jennings #include <linux/types.h> 222b281117SSeth Jennings #include <linux/atomic.h> 232b281117SSeth Jennings #include <linux/rbtree.h> 242b281117SSeth Jennings #include <linux/swap.h> 252b281117SSeth Jennings #include <linux/crypto.h> 261ec3b5feSBarry Song #include <linux/scatterlist.h> 27ddc1a5cbSHugh Dickins #include <linux/mempolicy.h> 282b281117SSeth Jennings #include <linux/mempool.h> 2912d79d64SDan Streetman #include <linux/zpool.h> 301ec3b5feSBarry Song #include <crypto/acompress.h> 3142c06a0eSJohannes Weiner #include <linux/zswap.h> 322b281117SSeth Jennings #include <linux/mm_types.h> 332b281117SSeth Jennings #include <linux/page-flags.h> 342b281117SSeth Jennings #include <linux/swapops.h> 352b281117SSeth Jennings #include <linux/writeback.h> 362b281117SSeth Jennings #include <linux/pagemap.h> 3745190f01SVitaly Wool #include <linux/workqueue.h> 38a65b0e76SDomenico Cerasuolo #include <linux/list_lru.h> 392b281117SSeth Jennings 40014bb1deSNeilBrown #include "swap.h" 41e0228d59SDomenico Cerasuolo #include "internal.h" 42014bb1deSNeilBrown 432b281117SSeth Jennings /********************************* 442b281117SSeth Jennings * statistics 452b281117SSeth Jennings **********************************/ 4612d79d64SDan Streetman /* Total bytes used by the compressed storage */ 47f6498b77SJohannes Weiner u64 zswap_pool_total_size; 482b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 49f6498b77SJohannes Weiner atomic_t zswap_stored_pages = ATOMIC_INIT(0); 50a85f878bSSrividya Desireddy /* The number of same-value filled pages currently stored in zswap */ 51a85f878bSSrividya Desireddy static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 522b281117SSeth Jennings 532b281117SSeth Jennings /* 542b281117SSeth Jennings * The statistics below are not protected from concurrent access for 552b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 562b281117SSeth Jennings * they do provide useful information on roughly how many times a 572b281117SSeth Jennings * certain event is occurring. 582b281117SSeth Jennings */ 592b281117SSeth Jennings 602b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 612b281117SSeth Jennings static u64 zswap_pool_limit_hit; 622b281117SSeth Jennings /* Pages written back when pool limit was reached */ 632b281117SSeth Jennings static u64 zswap_written_back_pages; 642b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 652b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66cb61dad8SNhat Pham /* Store failed due to compression algorithm failure */ 67cb61dad8SNhat Pham static u64 zswap_reject_compress_fail; 682b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 692b281117SSeth Jennings static u64 zswap_reject_compress_poor; 702b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 712b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 722b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 732b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 742b281117SSeth Jennings /* Duplicate store was encountered (rare) */ 752b281117SSeth Jennings static u64 zswap_duplicate_entry; 762b281117SSeth Jennings 7745190f01SVitaly Wool /* Shrinker work queue */ 7845190f01SVitaly Wool static struct workqueue_struct *shrink_wq; 7945190f01SVitaly Wool /* Pool limit was hit, we need to calm down */ 8045190f01SVitaly Wool static bool zswap_pool_reached_full; 8145190f01SVitaly Wool 822b281117SSeth Jennings /********************************* 832b281117SSeth Jennings * tunables 842b281117SSeth Jennings **********************************/ 85c00ed16aSDan Streetman 86bae21db8SDan Streetman #define ZSWAP_PARAM_UNSET "" 87bae21db8SDan Streetman 88141fdeecSLiu Shixin static int zswap_setup(void); 89141fdeecSLiu Shixin 90bb8b93b5SMaciej S. Szmigiero /* Enable/disable zswap */ 91bb8b93b5SMaciej S. Szmigiero static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 92d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *, 93d7b028f5SDan Streetman const struct kernel_param *); 9483aed6cdSJoe Perches static const struct kernel_param_ops zswap_enabled_param_ops = { 95d7b028f5SDan Streetman .set = zswap_enabled_param_set, 96d7b028f5SDan Streetman .get = param_get_bool, 97d7b028f5SDan Streetman }; 98d7b028f5SDan Streetman module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 992b281117SSeth Jennings 10090b0fc26SDan Streetman /* Crypto compressor to use */ 101bb8b93b5SMaciej S. Szmigiero static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 10290b0fc26SDan Streetman static int zswap_compressor_param_set(const char *, 10390b0fc26SDan Streetman const struct kernel_param *); 10483aed6cdSJoe Perches static const struct kernel_param_ops zswap_compressor_param_ops = { 10590b0fc26SDan Streetman .set = zswap_compressor_param_set, 106c99b42c3SDan Streetman .get = param_get_charp, 107c99b42c3SDan Streetman .free = param_free_charp, 10890b0fc26SDan Streetman }; 10990b0fc26SDan Streetman module_param_cb(compressor, &zswap_compressor_param_ops, 110c99b42c3SDan Streetman &zswap_compressor, 0644); 11190b0fc26SDan Streetman 11290b0fc26SDan Streetman /* Compressed storage zpool to use */ 113bb8b93b5SMaciej S. Szmigiero static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 11490b0fc26SDan Streetman static int zswap_zpool_param_set(const char *, const struct kernel_param *); 11583aed6cdSJoe Perches static const struct kernel_param_ops zswap_zpool_param_ops = { 11690b0fc26SDan Streetman .set = zswap_zpool_param_set, 117c99b42c3SDan Streetman .get = param_get_charp, 118c99b42c3SDan Streetman .free = param_free_charp, 11990b0fc26SDan Streetman }; 120c99b42c3SDan Streetman module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 1212b281117SSeth Jennings 1222b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 1232b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 12490b0fc26SDan Streetman module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 12560105e12SMinchan Kim 12645190f01SVitaly Wool /* The threshold for accepting new pages after the max_pool_percent was hit */ 12745190f01SVitaly Wool static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 12845190f01SVitaly Wool module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 12945190f01SVitaly Wool uint, 0644); 13045190f01SVitaly Wool 131cb325dddSMaciej S. Szmigiero /* 132cb325dddSMaciej S. Szmigiero * Enable/disable handling same-value filled pages (enabled by default). 133cb325dddSMaciej S. Szmigiero * If disabled every page is considered non-same-value filled. 134cb325dddSMaciej S. Szmigiero */ 135a85f878bSSrividya Desireddy static bool zswap_same_filled_pages_enabled = true; 136a85f878bSSrividya Desireddy module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 137a85f878bSSrividya Desireddy bool, 0644); 138a85f878bSSrividya Desireddy 139cb325dddSMaciej S. Szmigiero /* Enable/disable handling non-same-value filled pages (enabled by default) */ 140cb325dddSMaciej S. Szmigiero static bool zswap_non_same_filled_pages_enabled = true; 141cb325dddSMaciej S. Szmigiero module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 142cb325dddSMaciej S. Szmigiero bool, 0644); 143cb325dddSMaciej S. Szmigiero 144b9c91c43SYosry Ahmed static bool zswap_exclusive_loads_enabled = IS_ENABLED( 145b9c91c43SYosry Ahmed CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); 146b9c91c43SYosry Ahmed module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); 147b9c91c43SYosry Ahmed 148b8cf32dcSYosry Ahmed /* Number of zpools in zswap_pool (empirically determined for scalability) */ 149b8cf32dcSYosry Ahmed #define ZSWAP_NR_ZPOOLS 32 150b8cf32dcSYosry Ahmed 151b5ba474fSNhat Pham /* Enable/disable memory pressure-based shrinker. */ 152b5ba474fSNhat Pham static bool zswap_shrinker_enabled = IS_ENABLED( 153b5ba474fSNhat Pham CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); 154b5ba474fSNhat Pham module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); 155b5ba474fSNhat Pham 156501a06feSNhat Pham bool is_zswap_enabled(void) 157501a06feSNhat Pham { 158501a06feSNhat Pham return zswap_enabled; 159501a06feSNhat Pham } 160501a06feSNhat Pham 1612b281117SSeth Jennings /********************************* 1622b281117SSeth Jennings * data structures 1632b281117SSeth Jennings **********************************/ 164f1c54846SDan Streetman 1651ec3b5feSBarry Song struct crypto_acomp_ctx { 1661ec3b5feSBarry Song struct crypto_acomp *acomp; 1671ec3b5feSBarry Song struct acomp_req *req; 1681ec3b5feSBarry Song struct crypto_wait wait; 1698ba2f844SChengming Zhou u8 *buffer; 1708ba2f844SChengming Zhou struct mutex mutex; 1711ec3b5feSBarry Song }; 1721ec3b5feSBarry Song 173f999f38bSDomenico Cerasuolo /* 174f999f38bSDomenico Cerasuolo * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 175f999f38bSDomenico Cerasuolo * The only case where lru_lock is not acquired while holding tree.lock is 176f999f38bSDomenico Cerasuolo * when a zswap_entry is taken off the lru for writeback, in that case it 177f999f38bSDomenico Cerasuolo * needs to be verified that it's still valid in the tree. 178f999f38bSDomenico Cerasuolo */ 179f1c54846SDan Streetman struct zswap_pool { 180b8cf32dcSYosry Ahmed struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 1811ec3b5feSBarry Song struct crypto_acomp_ctx __percpu *acomp_ctx; 182f1c54846SDan Streetman struct kref kref; 183f1c54846SDan Streetman struct list_head list; 18445190f01SVitaly Wool struct work_struct release_work; 18545190f01SVitaly Wool struct work_struct shrink_work; 186cab7a7e5SSebastian Andrzej Siewior struct hlist_node node; 187f1c54846SDan Streetman char tfm_name[CRYPTO_MAX_ALG_NAME]; 188a65b0e76SDomenico Cerasuolo struct list_lru list_lru; 189a65b0e76SDomenico Cerasuolo struct mem_cgroup *next_shrink; 190b5ba474fSNhat Pham struct shrinker *shrinker; 191b5ba474fSNhat Pham atomic_t nr_stored; 192f1c54846SDan Streetman }; 193f1c54846SDan Streetman 1942b281117SSeth Jennings /* 1952b281117SSeth Jennings * struct zswap_entry 1962b281117SSeth Jennings * 1972b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 1982b281117SSeth Jennings * page within zswap. 1992b281117SSeth Jennings * 2002b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 20197157d89SXiu Jianfeng * swpentry - associated swap entry, the offset indexes into the red-black tree 2022b281117SSeth Jennings * refcount - the number of outstanding reference to the entry. This is needed 2032b281117SSeth Jennings * to protect against premature freeing of the entry by code 2046b452516SSeongJae Park * concurrent calls to load, invalidate, and writeback. The lock 2052b281117SSeth Jennings * for the zswap_tree structure that contains the entry must 2062b281117SSeth Jennings * be held while changing the refcount. Since the lock must 2072b281117SSeth Jennings * be held, there is no reason to also make refcount atomic. 2082b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 209f999f38bSDomenico Cerasuolo * decompression. For a same value filled page length is 0, and both 210f999f38bSDomenico Cerasuolo * pool and lru are invalid and must be ignored. 211f1c54846SDan Streetman * pool - the zswap_pool the entry's data is in 212f1c54846SDan Streetman * handle - zpool allocation handle that stores the compressed page data 213a85f878bSSrividya Desireddy * value - value of the same-value filled pages which have same content 21497157d89SXiu Jianfeng * objcg - the obj_cgroup that the compressed memory is charged to 215f999f38bSDomenico Cerasuolo * lru - handle to the pool's lru used to evict pages. 2162b281117SSeth Jennings */ 2172b281117SSeth Jennings struct zswap_entry { 2182b281117SSeth Jennings struct rb_node rbnode; 2190bb48849SDomenico Cerasuolo swp_entry_t swpentry; 2202b281117SSeth Jennings int refcount; 2212b281117SSeth Jennings unsigned int length; 222f1c54846SDan Streetman struct zswap_pool *pool; 223a85f878bSSrividya Desireddy union { 2242b281117SSeth Jennings unsigned long handle; 225a85f878bSSrividya Desireddy unsigned long value; 226a85f878bSSrividya Desireddy }; 227f4840ccfSJohannes Weiner struct obj_cgroup *objcg; 228f999f38bSDomenico Cerasuolo struct list_head lru; 2292b281117SSeth Jennings }; 2302b281117SSeth Jennings 2312b281117SSeth Jennings /* 2322b281117SSeth Jennings * The tree lock in the zswap_tree struct protects a few things: 2332b281117SSeth Jennings * - the rbtree 2342b281117SSeth Jennings * - the refcount field of each entry in the tree 2352b281117SSeth Jennings */ 2362b281117SSeth Jennings struct zswap_tree { 2372b281117SSeth Jennings struct rb_root rbroot; 2382b281117SSeth Jennings spinlock_t lock; 2392b281117SSeth Jennings }; 2402b281117SSeth Jennings 2412b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 2422b281117SSeth Jennings 243f1c54846SDan Streetman /* RCU-protected iteration */ 244f1c54846SDan Streetman static LIST_HEAD(zswap_pools); 245f1c54846SDan Streetman /* protects zswap_pools list modification */ 246f1c54846SDan Streetman static DEFINE_SPINLOCK(zswap_pools_lock); 24732a4e169SDan Streetman /* pool counter to provide unique names to zpool */ 24832a4e169SDan Streetman static atomic_t zswap_pools_count = ATOMIC_INIT(0); 249f1c54846SDan Streetman 2509021ccecSLiu Shixin enum zswap_init_type { 2519021ccecSLiu Shixin ZSWAP_UNINIT, 2529021ccecSLiu Shixin ZSWAP_INIT_SUCCEED, 2539021ccecSLiu Shixin ZSWAP_INIT_FAILED 2549021ccecSLiu Shixin }; 25590b0fc26SDan Streetman 2569021ccecSLiu Shixin static enum zswap_init_type zswap_init_state; 257d7b028f5SDan Streetman 258141fdeecSLiu Shixin /* used to ensure the integrity of initialization */ 259141fdeecSLiu Shixin static DEFINE_MUTEX(zswap_init_lock); 260f1c54846SDan Streetman 261ae3d89a7SDan Streetman /* init completed, but couldn't create the initial pool */ 262ae3d89a7SDan Streetman static bool zswap_has_pool; 263ae3d89a7SDan Streetman 264f1c54846SDan Streetman /********************************* 265f1c54846SDan Streetman * helpers and fwd declarations 266f1c54846SDan Streetman **********************************/ 267f1c54846SDan Streetman 268f1c54846SDan Streetman #define zswap_pool_debug(msg, p) \ 269f1c54846SDan Streetman pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 270b8cf32dcSYosry Ahmed zpool_get_type((p)->zpools[0])) 271f1c54846SDan Streetman 2720bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry, 273ff9d5ba2SDomenico Cerasuolo struct zswap_tree *tree); 274f1c54846SDan Streetman static int zswap_pool_get(struct zswap_pool *pool); 275f1c54846SDan Streetman static void zswap_pool_put(struct zswap_pool *pool); 276f1c54846SDan Streetman 277f1c54846SDan Streetman static bool zswap_is_full(void) 278f1c54846SDan Streetman { 279ca79b0c2SArun KS return totalram_pages() * zswap_max_pool_percent / 100 < 280f1c54846SDan Streetman DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 281f1c54846SDan Streetman } 282f1c54846SDan Streetman 28345190f01SVitaly Wool static bool zswap_can_accept(void) 28445190f01SVitaly Wool { 28545190f01SVitaly Wool return totalram_pages() * zswap_accept_thr_percent / 100 * 28645190f01SVitaly Wool zswap_max_pool_percent / 100 > 28745190f01SVitaly Wool DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 28845190f01SVitaly Wool } 28945190f01SVitaly Wool 290b5ba474fSNhat Pham static u64 get_zswap_pool_size(struct zswap_pool *pool) 291b5ba474fSNhat Pham { 292b5ba474fSNhat Pham u64 pool_size = 0; 293b5ba474fSNhat Pham int i; 294b5ba474fSNhat Pham 295b5ba474fSNhat Pham for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 296b5ba474fSNhat Pham pool_size += zpool_get_total_size(pool->zpools[i]); 297b5ba474fSNhat Pham 298b5ba474fSNhat Pham return pool_size; 299b5ba474fSNhat Pham } 300b5ba474fSNhat Pham 301f1c54846SDan Streetman static void zswap_update_total_size(void) 302f1c54846SDan Streetman { 303f1c54846SDan Streetman struct zswap_pool *pool; 304f1c54846SDan Streetman u64 total = 0; 305f1c54846SDan Streetman 306f1c54846SDan Streetman rcu_read_lock(); 307f1c54846SDan Streetman 308f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 309b5ba474fSNhat Pham total += get_zswap_pool_size(pool); 310f1c54846SDan Streetman 311f1c54846SDan Streetman rcu_read_unlock(); 312f1c54846SDan Streetman 313f1c54846SDan Streetman zswap_pool_total_size = total; 314f1c54846SDan Streetman } 315f1c54846SDan Streetman 316a65b0e76SDomenico Cerasuolo /* should be called under RCU */ 317a65b0e76SDomenico Cerasuolo #ifdef CONFIG_MEMCG 318a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 319a65b0e76SDomenico Cerasuolo { 320a65b0e76SDomenico Cerasuolo return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; 321a65b0e76SDomenico Cerasuolo } 322a65b0e76SDomenico Cerasuolo #else 323a65b0e76SDomenico Cerasuolo static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) 324a65b0e76SDomenico Cerasuolo { 325a65b0e76SDomenico Cerasuolo return NULL; 326a65b0e76SDomenico Cerasuolo } 327a65b0e76SDomenico Cerasuolo #endif 328a65b0e76SDomenico Cerasuolo 329a65b0e76SDomenico Cerasuolo static inline int entry_to_nid(struct zswap_entry *entry) 330a65b0e76SDomenico Cerasuolo { 331a65b0e76SDomenico Cerasuolo return page_to_nid(virt_to_page(entry)); 332a65b0e76SDomenico Cerasuolo } 333a65b0e76SDomenico Cerasuolo 334a65b0e76SDomenico Cerasuolo void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) 335a65b0e76SDomenico Cerasuolo { 336a65b0e76SDomenico Cerasuolo struct zswap_pool *pool; 337a65b0e76SDomenico Cerasuolo 338a65b0e76SDomenico Cerasuolo /* lock out zswap pools list modification */ 339a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 340a65b0e76SDomenico Cerasuolo list_for_each_entry(pool, &zswap_pools, list) { 341a65b0e76SDomenico Cerasuolo if (pool->next_shrink == memcg) 342a65b0e76SDomenico Cerasuolo pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 343a65b0e76SDomenico Cerasuolo } 344a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 345a65b0e76SDomenico Cerasuolo } 346a65b0e76SDomenico Cerasuolo 3472b281117SSeth Jennings /********************************* 3482b281117SSeth Jennings * zswap entry functions 3492b281117SSeth Jennings **********************************/ 3502b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache; 3512b281117SSeth Jennings 352a65b0e76SDomenico Cerasuolo static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) 3532b281117SSeth Jennings { 3542b281117SSeth Jennings struct zswap_entry *entry; 355a65b0e76SDomenico Cerasuolo entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); 3562b281117SSeth Jennings if (!entry) 3572b281117SSeth Jennings return NULL; 3582b281117SSeth Jennings entry->refcount = 1; 3590ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 3602b281117SSeth Jennings return entry; 3612b281117SSeth Jennings } 3622b281117SSeth Jennings 3632b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry) 3642b281117SSeth Jennings { 3652b281117SSeth Jennings kmem_cache_free(zswap_entry_cache, entry); 3662b281117SSeth Jennings } 3672b281117SSeth Jennings 3682b281117SSeth Jennings /********************************* 369b5ba474fSNhat Pham * zswap lruvec functions 370b5ba474fSNhat Pham **********************************/ 371b5ba474fSNhat Pham void zswap_lruvec_state_init(struct lruvec *lruvec) 372b5ba474fSNhat Pham { 373b5ba474fSNhat Pham atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0); 374b5ba474fSNhat Pham } 375b5ba474fSNhat Pham 37696c7b0b4SMatthew Wilcox (Oracle) void zswap_folio_swapin(struct folio *folio) 377b5ba474fSNhat Pham { 378b5ba474fSNhat Pham struct lruvec *lruvec; 379b5ba474fSNhat Pham 38016e96ba5SNhat Pham VM_WARN_ON_ONCE(!folio_test_locked(folio)); 38196c7b0b4SMatthew Wilcox (Oracle) lruvec = folio_lruvec(folio); 382b5ba474fSNhat Pham atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 383b5ba474fSNhat Pham } 384b5ba474fSNhat Pham 385b5ba474fSNhat Pham /********************************* 386a65b0e76SDomenico Cerasuolo * lru functions 387a65b0e76SDomenico Cerasuolo **********************************/ 388a65b0e76SDomenico Cerasuolo static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) 389a65b0e76SDomenico Cerasuolo { 390b5ba474fSNhat Pham atomic_long_t *nr_zswap_protected; 391b5ba474fSNhat Pham unsigned long lru_size, old, new; 392a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 393a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 394b5ba474fSNhat Pham struct lruvec *lruvec; 395a65b0e76SDomenico Cerasuolo 396a65b0e76SDomenico Cerasuolo /* 397a65b0e76SDomenico Cerasuolo * Note that it is safe to use rcu_read_lock() here, even in the face of 398a65b0e76SDomenico Cerasuolo * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection 399a65b0e76SDomenico Cerasuolo * used in list_lru lookup, only two scenarios are possible: 400a65b0e76SDomenico Cerasuolo * 401a65b0e76SDomenico Cerasuolo * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The 402a65b0e76SDomenico Cerasuolo * new entry will be reparented to memcg's parent's list_lru. 403a65b0e76SDomenico Cerasuolo * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The 404a65b0e76SDomenico Cerasuolo * new entry will be added directly to memcg's parent's list_lru. 405a65b0e76SDomenico Cerasuolo * 406a65b0e76SDomenico Cerasuolo * Similar reasoning holds for list_lru_del() and list_lru_putback(). 407a65b0e76SDomenico Cerasuolo */ 408a65b0e76SDomenico Cerasuolo rcu_read_lock(); 409a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 410a65b0e76SDomenico Cerasuolo /* will always succeed */ 411a65b0e76SDomenico Cerasuolo list_lru_add(list_lru, &entry->lru, nid, memcg); 412b5ba474fSNhat Pham 413b5ba474fSNhat Pham /* Update the protection area */ 414b5ba474fSNhat Pham lru_size = list_lru_count_one(list_lru, nid, memcg); 415b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); 416b5ba474fSNhat Pham nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected; 417b5ba474fSNhat Pham old = atomic_long_inc_return(nr_zswap_protected); 418b5ba474fSNhat Pham /* 419b5ba474fSNhat Pham * Decay to avoid overflow and adapt to changing workloads. 420b5ba474fSNhat Pham * This is based on LRU reclaim cost decaying heuristics. 421b5ba474fSNhat Pham */ 422b5ba474fSNhat Pham do { 423b5ba474fSNhat Pham new = old > lru_size / 4 ? old / 2 : old; 424b5ba474fSNhat Pham } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new)); 425a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 426a65b0e76SDomenico Cerasuolo } 427a65b0e76SDomenico Cerasuolo 428a65b0e76SDomenico Cerasuolo static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) 429a65b0e76SDomenico Cerasuolo { 430a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 431a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 432a65b0e76SDomenico Cerasuolo 433a65b0e76SDomenico Cerasuolo rcu_read_lock(); 434a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 435a65b0e76SDomenico Cerasuolo /* will always succeed */ 436a65b0e76SDomenico Cerasuolo list_lru_del(list_lru, &entry->lru, nid, memcg); 437a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 438a65b0e76SDomenico Cerasuolo } 439a65b0e76SDomenico Cerasuolo 440a65b0e76SDomenico Cerasuolo static void zswap_lru_putback(struct list_lru *list_lru, 441a65b0e76SDomenico Cerasuolo struct zswap_entry *entry) 442a65b0e76SDomenico Cerasuolo { 443a65b0e76SDomenico Cerasuolo int nid = entry_to_nid(entry); 444a65b0e76SDomenico Cerasuolo spinlock_t *lock = &list_lru->node[nid].lock; 445a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 446b5ba474fSNhat Pham struct lruvec *lruvec; 447a65b0e76SDomenico Cerasuolo 448a65b0e76SDomenico Cerasuolo rcu_read_lock(); 449a65b0e76SDomenico Cerasuolo memcg = mem_cgroup_from_entry(entry); 450a65b0e76SDomenico Cerasuolo spin_lock(lock); 451a65b0e76SDomenico Cerasuolo /* we cannot use list_lru_add here, because it increments node's lru count */ 452a65b0e76SDomenico Cerasuolo list_lru_putback(list_lru, &entry->lru, nid, memcg); 453a65b0e76SDomenico Cerasuolo spin_unlock(lock); 454b5ba474fSNhat Pham 455b5ba474fSNhat Pham lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(entry_to_nid(entry))); 456b5ba474fSNhat Pham /* increment the protection area to account for the LRU rotation. */ 457b5ba474fSNhat Pham atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); 458a65b0e76SDomenico Cerasuolo rcu_read_unlock(); 459a65b0e76SDomenico Cerasuolo } 460a65b0e76SDomenico Cerasuolo 461a65b0e76SDomenico Cerasuolo /********************************* 4622b281117SSeth Jennings * rbtree functions 4632b281117SSeth Jennings **********************************/ 4642b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 4652b281117SSeth Jennings { 4662b281117SSeth Jennings struct rb_node *node = root->rb_node; 4672b281117SSeth Jennings struct zswap_entry *entry; 4680bb48849SDomenico Cerasuolo pgoff_t entry_offset; 4692b281117SSeth Jennings 4702b281117SSeth Jennings while (node) { 4712b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 4720bb48849SDomenico Cerasuolo entry_offset = swp_offset(entry->swpentry); 4730bb48849SDomenico Cerasuolo if (entry_offset > offset) 4742b281117SSeth Jennings node = node->rb_left; 4750bb48849SDomenico Cerasuolo else if (entry_offset < offset) 4762b281117SSeth Jennings node = node->rb_right; 4772b281117SSeth Jennings else 4782b281117SSeth Jennings return entry; 4792b281117SSeth Jennings } 4802b281117SSeth Jennings return NULL; 4812b281117SSeth Jennings } 4822b281117SSeth Jennings 4832b281117SSeth Jennings /* 4842b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 4852b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 4862b281117SSeth Jennings */ 4872b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 4882b281117SSeth Jennings struct zswap_entry **dupentry) 4892b281117SSeth Jennings { 4902b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 4912b281117SSeth Jennings struct zswap_entry *myentry; 4920bb48849SDomenico Cerasuolo pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 4932b281117SSeth Jennings 4942b281117SSeth Jennings while (*link) { 4952b281117SSeth Jennings parent = *link; 4962b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 4970bb48849SDomenico Cerasuolo myentry_offset = swp_offset(myentry->swpentry); 4980bb48849SDomenico Cerasuolo if (myentry_offset > entry_offset) 4992b281117SSeth Jennings link = &(*link)->rb_left; 5000bb48849SDomenico Cerasuolo else if (myentry_offset < entry_offset) 5012b281117SSeth Jennings link = &(*link)->rb_right; 5022b281117SSeth Jennings else { 5032b281117SSeth Jennings *dupentry = myentry; 5042b281117SSeth Jennings return -EEXIST; 5052b281117SSeth Jennings } 5062b281117SSeth Jennings } 5072b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 5082b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 5092b281117SSeth Jennings return 0; 5102b281117SSeth Jennings } 5112b281117SSeth Jennings 51218a93707SYosry Ahmed static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 5130ab0abcfSWeijie Yang { 5140ab0abcfSWeijie Yang if (!RB_EMPTY_NODE(&entry->rbnode)) { 5150ab0abcfSWeijie Yang rb_erase(&entry->rbnode, root); 5160ab0abcfSWeijie Yang RB_CLEAR_NODE(&entry->rbnode); 51718a93707SYosry Ahmed return true; 5180ab0abcfSWeijie Yang } 51918a93707SYosry Ahmed return false; 5200ab0abcfSWeijie Yang } 5210ab0abcfSWeijie Yang 522b8cf32dcSYosry Ahmed static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 523b8cf32dcSYosry Ahmed { 524b8cf32dcSYosry Ahmed int i = 0; 525b8cf32dcSYosry Ahmed 526b8cf32dcSYosry Ahmed if (ZSWAP_NR_ZPOOLS > 1) 527b8cf32dcSYosry Ahmed i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 528b8cf32dcSYosry Ahmed 529b8cf32dcSYosry Ahmed return entry->pool->zpools[i]; 530b8cf32dcSYosry Ahmed } 531b8cf32dcSYosry Ahmed 5320ab0abcfSWeijie Yang /* 53312d79d64SDan Streetman * Carries out the common pattern of freeing and entry's zpool allocation, 5340ab0abcfSWeijie Yang * freeing the entry itself, and decrementing the number of stored pages. 5350ab0abcfSWeijie Yang */ 53660105e12SMinchan Kim static void zswap_free_entry(struct zswap_entry *entry) 5370ab0abcfSWeijie Yang { 538a85f878bSSrividya Desireddy if (!entry->length) 539a85f878bSSrividya Desireddy atomic_dec(&zswap_same_filled_pages); 540a85f878bSSrividya Desireddy else { 541a65b0e76SDomenico Cerasuolo zswap_lru_del(&entry->pool->list_lru, entry); 542b8cf32dcSYosry Ahmed zpool_free(zswap_find_zpool(entry), entry->handle); 543b5ba474fSNhat Pham atomic_dec(&entry->pool->nr_stored); 544f1c54846SDan Streetman zswap_pool_put(entry->pool); 545a85f878bSSrividya Desireddy } 5462e601e1eSJohannes Weiner if (entry->objcg) { 5472e601e1eSJohannes Weiner obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 5482e601e1eSJohannes Weiner obj_cgroup_put(entry->objcg); 5492e601e1eSJohannes Weiner } 5500ab0abcfSWeijie Yang zswap_entry_cache_free(entry); 5510ab0abcfSWeijie Yang atomic_dec(&zswap_stored_pages); 552f1c54846SDan Streetman zswap_update_total_size(); 5530ab0abcfSWeijie Yang } 5540ab0abcfSWeijie Yang 5550ab0abcfSWeijie Yang /* caller must hold the tree lock */ 5560ab0abcfSWeijie Yang static void zswap_entry_get(struct zswap_entry *entry) 5570ab0abcfSWeijie Yang { 5580ab0abcfSWeijie Yang entry->refcount++; 5590ab0abcfSWeijie Yang } 5600ab0abcfSWeijie Yang 5610ab0abcfSWeijie Yang /* caller must hold the tree lock 5620ab0abcfSWeijie Yang * remove from the tree and free it, if nobody reference the entry 5630ab0abcfSWeijie Yang */ 5640ab0abcfSWeijie Yang static void zswap_entry_put(struct zswap_tree *tree, 5650ab0abcfSWeijie Yang struct zswap_entry *entry) 5660ab0abcfSWeijie Yang { 5670ab0abcfSWeijie Yang int refcount = --entry->refcount; 5680ab0abcfSWeijie Yang 56973108957SJohannes Weiner WARN_ON_ONCE(refcount < 0); 5700ab0abcfSWeijie Yang if (refcount == 0) { 57173108957SJohannes Weiner WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode)); 57260105e12SMinchan Kim zswap_free_entry(entry); 5730ab0abcfSWeijie Yang } 5740ab0abcfSWeijie Yang } 5750ab0abcfSWeijie Yang 5760ab0abcfSWeijie Yang /* caller must hold the tree lock */ 5770ab0abcfSWeijie Yang static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 5780ab0abcfSWeijie Yang pgoff_t offset) 5790ab0abcfSWeijie Yang { 580b0c9865fSAlexey Klimov struct zswap_entry *entry; 5810ab0abcfSWeijie Yang 5820ab0abcfSWeijie Yang entry = zswap_rb_search(root, offset); 5830ab0abcfSWeijie Yang if (entry) 5840ab0abcfSWeijie Yang zswap_entry_get(entry); 5850ab0abcfSWeijie Yang 5860ab0abcfSWeijie Yang return entry; 5870ab0abcfSWeijie Yang } 5880ab0abcfSWeijie Yang 5892b281117SSeth Jennings /********************************* 590b5ba474fSNhat Pham * shrinker functions 591b5ba474fSNhat Pham **********************************/ 592b5ba474fSNhat Pham static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 593b5ba474fSNhat Pham spinlock_t *lock, void *arg); 594b5ba474fSNhat Pham 595b5ba474fSNhat Pham static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, 596b5ba474fSNhat Pham struct shrink_control *sc) 597b5ba474fSNhat Pham { 598b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); 599b5ba474fSNhat Pham unsigned long shrink_ret, nr_protected, lru_size; 600b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 601b5ba474fSNhat Pham bool encountered_page_in_swapcache = false; 602b5ba474fSNhat Pham 603501a06feSNhat Pham if (!zswap_shrinker_enabled || 604501a06feSNhat Pham !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { 605b5ba474fSNhat Pham sc->nr_scanned = 0; 606b5ba474fSNhat Pham return SHRINK_STOP; 607b5ba474fSNhat Pham } 608b5ba474fSNhat Pham 609b5ba474fSNhat Pham nr_protected = 610b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 611b5ba474fSNhat Pham lru_size = list_lru_shrink_count(&pool->list_lru, sc); 612b5ba474fSNhat Pham 613b5ba474fSNhat Pham /* 614b5ba474fSNhat Pham * Abort if we are shrinking into the protected region. 615b5ba474fSNhat Pham * 616b5ba474fSNhat Pham * This short-circuiting is necessary because if we have too many multiple 617b5ba474fSNhat Pham * concurrent reclaimers getting the freeable zswap object counts at the 618b5ba474fSNhat Pham * same time (before any of them made reasonable progress), the total 619b5ba474fSNhat Pham * number of reclaimed objects might be more than the number of unprotected 620b5ba474fSNhat Pham * objects (i.e the reclaimers will reclaim into the protected area of the 621b5ba474fSNhat Pham * zswap LRU). 622b5ba474fSNhat Pham */ 623b5ba474fSNhat Pham if (nr_protected >= lru_size - sc->nr_to_scan) { 624b5ba474fSNhat Pham sc->nr_scanned = 0; 625b5ba474fSNhat Pham return SHRINK_STOP; 626b5ba474fSNhat Pham } 627b5ba474fSNhat Pham 628b5ba474fSNhat Pham shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb, 629b5ba474fSNhat Pham &encountered_page_in_swapcache); 630b5ba474fSNhat Pham 631b5ba474fSNhat Pham if (encountered_page_in_swapcache) 632b5ba474fSNhat Pham return SHRINK_STOP; 633b5ba474fSNhat Pham 634b5ba474fSNhat Pham return shrink_ret ? shrink_ret : SHRINK_STOP; 635b5ba474fSNhat Pham } 636b5ba474fSNhat Pham 637b5ba474fSNhat Pham static unsigned long zswap_shrinker_count(struct shrinker *shrinker, 638b5ba474fSNhat Pham struct shrink_control *sc) 639b5ba474fSNhat Pham { 640b5ba474fSNhat Pham struct zswap_pool *pool = shrinker->private_data; 641b5ba474fSNhat Pham struct mem_cgroup *memcg = sc->memcg; 642b5ba474fSNhat Pham struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); 643b5ba474fSNhat Pham unsigned long nr_backing, nr_stored, nr_freeable, nr_protected; 644b5ba474fSNhat Pham 645501a06feSNhat Pham if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) 646b5ba474fSNhat Pham return 0; 647b5ba474fSNhat Pham 648b5ba474fSNhat Pham #ifdef CONFIG_MEMCG_KMEM 6497d7ef0a4SYosry Ahmed mem_cgroup_flush_stats(memcg); 650b5ba474fSNhat Pham nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; 651b5ba474fSNhat Pham nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); 652b5ba474fSNhat Pham #else 653b5ba474fSNhat Pham /* use pool stats instead of memcg stats */ 654b5ba474fSNhat Pham nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT; 655b5ba474fSNhat Pham nr_stored = atomic_read(&pool->nr_stored); 656b5ba474fSNhat Pham #endif 657b5ba474fSNhat Pham 658b5ba474fSNhat Pham if (!nr_stored) 659b5ba474fSNhat Pham return 0; 660b5ba474fSNhat Pham 661b5ba474fSNhat Pham nr_protected = 662b5ba474fSNhat Pham atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected); 663b5ba474fSNhat Pham nr_freeable = list_lru_shrink_count(&pool->list_lru, sc); 664b5ba474fSNhat Pham /* 665b5ba474fSNhat Pham * Subtract the lru size by an estimate of the number of pages 666b5ba474fSNhat Pham * that should be protected. 667b5ba474fSNhat Pham */ 668b5ba474fSNhat Pham nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0; 669b5ba474fSNhat Pham 670b5ba474fSNhat Pham /* 671b5ba474fSNhat Pham * Scale the number of freeable pages by the memory saving factor. 672b5ba474fSNhat Pham * This ensures that the better zswap compresses memory, the fewer 673b5ba474fSNhat Pham * pages we will evict to swap (as it will otherwise incur IO for 674b5ba474fSNhat Pham * relatively small memory saving). 675b5ba474fSNhat Pham */ 676b5ba474fSNhat Pham return mult_frac(nr_freeable, nr_backing, nr_stored); 677b5ba474fSNhat Pham } 678b5ba474fSNhat Pham 679b5ba474fSNhat Pham static void zswap_alloc_shrinker(struct zswap_pool *pool) 680b5ba474fSNhat Pham { 681b5ba474fSNhat Pham pool->shrinker = 682b5ba474fSNhat Pham shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); 683b5ba474fSNhat Pham if (!pool->shrinker) 684b5ba474fSNhat Pham return; 685b5ba474fSNhat Pham 686b5ba474fSNhat Pham pool->shrinker->private_data = pool; 687b5ba474fSNhat Pham pool->shrinker->scan_objects = zswap_shrinker_scan; 688b5ba474fSNhat Pham pool->shrinker->count_objects = zswap_shrinker_count; 689b5ba474fSNhat Pham pool->shrinker->batch = 0; 690b5ba474fSNhat Pham pool->shrinker->seeks = DEFAULT_SEEKS; 691b5ba474fSNhat Pham } 692b5ba474fSNhat Pham 693b5ba474fSNhat Pham /********************************* 6942b281117SSeth Jennings * per-cpu code 6952b281117SSeth Jennings **********************************/ 696cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 697f1c54846SDan Streetman { 698cab7a7e5SSebastian Andrzej Siewior struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 6991ec3b5feSBarry Song struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 7001ec3b5feSBarry Song struct crypto_acomp *acomp; 7011ec3b5feSBarry Song struct acomp_req *req; 7028ba2f844SChengming Zhou int ret; 7038ba2f844SChengming Zhou 7048ba2f844SChengming Zhou mutex_init(&acomp_ctx->mutex); 7058ba2f844SChengming Zhou 7068ba2f844SChengming Zhou acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 7078ba2f844SChengming Zhou if (!acomp_ctx->buffer) 7088ba2f844SChengming Zhou return -ENOMEM; 709f1c54846SDan Streetman 7101ec3b5feSBarry Song acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 7111ec3b5feSBarry Song if (IS_ERR(acomp)) { 7121ec3b5feSBarry Song pr_err("could not alloc crypto acomp %s : %ld\n", 7131ec3b5feSBarry Song pool->tfm_name, PTR_ERR(acomp)); 7148ba2f844SChengming Zhou ret = PTR_ERR(acomp); 7158ba2f844SChengming Zhou goto acomp_fail; 7161ec3b5feSBarry Song } 7171ec3b5feSBarry Song acomp_ctx->acomp = acomp; 718cab7a7e5SSebastian Andrzej Siewior 7191ec3b5feSBarry Song req = acomp_request_alloc(acomp_ctx->acomp); 7201ec3b5feSBarry Song if (!req) { 7211ec3b5feSBarry Song pr_err("could not alloc crypto acomp_request %s\n", 7221ec3b5feSBarry Song pool->tfm_name); 7238ba2f844SChengming Zhou ret = -ENOMEM; 7248ba2f844SChengming Zhou goto req_fail; 725f1c54846SDan Streetman } 7261ec3b5feSBarry Song acomp_ctx->req = req; 7271ec3b5feSBarry Song 7281ec3b5feSBarry Song crypto_init_wait(&acomp_ctx->wait); 7291ec3b5feSBarry Song /* 7301ec3b5feSBarry Song * if the backend of acomp is async zip, crypto_req_done() will wakeup 7311ec3b5feSBarry Song * crypto_wait_req(); if the backend of acomp is scomp, the callback 7321ec3b5feSBarry Song * won't be called, crypto_wait_req() will return without blocking. 7331ec3b5feSBarry Song */ 7341ec3b5feSBarry Song acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 7351ec3b5feSBarry Song crypto_req_done, &acomp_ctx->wait); 7361ec3b5feSBarry Song 737cab7a7e5SSebastian Andrzej Siewior return 0; 7388ba2f844SChengming Zhou 7398ba2f844SChengming Zhou req_fail: 7408ba2f844SChengming Zhou crypto_free_acomp(acomp_ctx->acomp); 7418ba2f844SChengming Zhou acomp_fail: 7428ba2f844SChengming Zhou kfree(acomp_ctx->buffer); 7438ba2f844SChengming Zhou return ret; 744cab7a7e5SSebastian Andrzej Siewior } 745cab7a7e5SSebastian Andrzej Siewior 746cab7a7e5SSebastian Andrzej Siewior static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 747cab7a7e5SSebastian Andrzej Siewior { 748cab7a7e5SSebastian Andrzej Siewior struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 7491ec3b5feSBarry Song struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 750cab7a7e5SSebastian Andrzej Siewior 7511ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx)) { 7521ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx->req)) 7531ec3b5feSBarry Song acomp_request_free(acomp_ctx->req); 7541ec3b5feSBarry Song if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 7551ec3b5feSBarry Song crypto_free_acomp(acomp_ctx->acomp); 7568ba2f844SChengming Zhou kfree(acomp_ctx->buffer); 7571ec3b5feSBarry Song } 7581ec3b5feSBarry Song 759f1c54846SDan Streetman return 0; 760f1c54846SDan Streetman } 761f1c54846SDan Streetman 762f1c54846SDan Streetman /********************************* 763f1c54846SDan Streetman * pool functions 764f1c54846SDan Streetman **********************************/ 765f1c54846SDan Streetman 766f1c54846SDan Streetman static struct zswap_pool *__zswap_pool_current(void) 767f1c54846SDan Streetman { 768f1c54846SDan Streetman struct zswap_pool *pool; 769f1c54846SDan Streetman 770f1c54846SDan Streetman pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 771ae3d89a7SDan Streetman WARN_ONCE(!pool && zswap_has_pool, 772ae3d89a7SDan Streetman "%s: no page storage pool!\n", __func__); 773f1c54846SDan Streetman 774f1c54846SDan Streetman return pool; 775f1c54846SDan Streetman } 776f1c54846SDan Streetman 777f1c54846SDan Streetman static struct zswap_pool *zswap_pool_current(void) 778f1c54846SDan Streetman { 779f1c54846SDan Streetman assert_spin_locked(&zswap_pools_lock); 780f1c54846SDan Streetman 781f1c54846SDan Streetman return __zswap_pool_current(); 782f1c54846SDan Streetman } 783f1c54846SDan Streetman 784f1c54846SDan Streetman static struct zswap_pool *zswap_pool_current_get(void) 785f1c54846SDan Streetman { 786f1c54846SDan Streetman struct zswap_pool *pool; 787f1c54846SDan Streetman 788f1c54846SDan Streetman rcu_read_lock(); 789f1c54846SDan Streetman 790f1c54846SDan Streetman pool = __zswap_pool_current(); 791ae3d89a7SDan Streetman if (!zswap_pool_get(pool)) 792f1c54846SDan Streetman pool = NULL; 793f1c54846SDan Streetman 794f1c54846SDan Streetman rcu_read_unlock(); 795f1c54846SDan Streetman 796f1c54846SDan Streetman return pool; 797f1c54846SDan Streetman } 798f1c54846SDan Streetman 799f1c54846SDan Streetman static struct zswap_pool *zswap_pool_last_get(void) 800f1c54846SDan Streetman { 801f1c54846SDan Streetman struct zswap_pool *pool, *last = NULL; 802f1c54846SDan Streetman 803f1c54846SDan Streetman rcu_read_lock(); 804f1c54846SDan Streetman 805f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) 806f1c54846SDan Streetman last = pool; 807ae3d89a7SDan Streetman WARN_ONCE(!last && zswap_has_pool, 808ae3d89a7SDan Streetman "%s: no page storage pool!\n", __func__); 809ae3d89a7SDan Streetman if (!zswap_pool_get(last)) 810f1c54846SDan Streetman last = NULL; 811f1c54846SDan Streetman 812f1c54846SDan Streetman rcu_read_unlock(); 813f1c54846SDan Streetman 814f1c54846SDan Streetman return last; 815f1c54846SDan Streetman } 816f1c54846SDan Streetman 8178bc8b228SDan Streetman /* type and compressor must be null-terminated */ 818f1c54846SDan Streetman static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 819f1c54846SDan Streetman { 820f1c54846SDan Streetman struct zswap_pool *pool; 821f1c54846SDan Streetman 822f1c54846SDan Streetman assert_spin_locked(&zswap_pools_lock); 823f1c54846SDan Streetman 824f1c54846SDan Streetman list_for_each_entry_rcu(pool, &zswap_pools, list) { 8258bc8b228SDan Streetman if (strcmp(pool->tfm_name, compressor)) 826f1c54846SDan Streetman continue; 827b8cf32dcSYosry Ahmed /* all zpools share the same type */ 828b8cf32dcSYosry Ahmed if (strcmp(zpool_get_type(pool->zpools[0]), type)) 829f1c54846SDan Streetman continue; 830f1c54846SDan Streetman /* if we can't get it, it's about to be destroyed */ 831f1c54846SDan Streetman if (!zswap_pool_get(pool)) 832f1c54846SDan Streetman continue; 833f1c54846SDan Streetman return pool; 834f1c54846SDan Streetman } 835f1c54846SDan Streetman 836f1c54846SDan Streetman return NULL; 837f1c54846SDan Streetman } 838f1c54846SDan Streetman 83918a93707SYosry Ahmed /* 84018a93707SYosry Ahmed * If the entry is still valid in the tree, drop the initial ref and remove it 84118a93707SYosry Ahmed * from the tree. This function must be called with an additional ref held, 84218a93707SYosry Ahmed * otherwise it may race with another invalidation freeing the entry. 84318a93707SYosry Ahmed */ 844418fd29dSDomenico Cerasuolo static void zswap_invalidate_entry(struct zswap_tree *tree, 845418fd29dSDomenico Cerasuolo struct zswap_entry *entry) 846418fd29dSDomenico Cerasuolo { 84718a93707SYosry Ahmed if (zswap_rb_erase(&tree->rbroot, entry)) 848418fd29dSDomenico Cerasuolo zswap_entry_put(tree, entry); 849418fd29dSDomenico Cerasuolo } 850418fd29dSDomenico Cerasuolo 851a65b0e76SDomenico Cerasuolo static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, 852a65b0e76SDomenico Cerasuolo spinlock_t *lock, void *arg) 853f999f38bSDomenico Cerasuolo { 854a65b0e76SDomenico Cerasuolo struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); 855b5ba474fSNhat Pham bool *encountered_page_in_swapcache = (bool *)arg; 856f999f38bSDomenico Cerasuolo struct zswap_tree *tree; 857f999f38bSDomenico Cerasuolo pgoff_t swpoffset; 858a65b0e76SDomenico Cerasuolo enum lru_status ret = LRU_REMOVED_RETRY; 859a65b0e76SDomenico Cerasuolo int writeback_result; 860f999f38bSDomenico Cerasuolo 861f999f38bSDomenico Cerasuolo /* 862f999f38bSDomenico Cerasuolo * Once the lru lock is dropped, the entry might get freed. The 863f999f38bSDomenico Cerasuolo * swpoffset is copied to the stack, and entry isn't deref'd again 864f999f38bSDomenico Cerasuolo * until the entry is verified to still be alive in the tree. 865f999f38bSDomenico Cerasuolo */ 8660bb48849SDomenico Cerasuolo swpoffset = swp_offset(entry->swpentry); 8670bb48849SDomenico Cerasuolo tree = zswap_trees[swp_type(entry->swpentry)]; 868a65b0e76SDomenico Cerasuolo list_lru_isolate(l, item); 869a65b0e76SDomenico Cerasuolo /* 870a65b0e76SDomenico Cerasuolo * It's safe to drop the lock here because we return either 871a65b0e76SDomenico Cerasuolo * LRU_REMOVED_RETRY or LRU_RETRY. 872a65b0e76SDomenico Cerasuolo */ 873a65b0e76SDomenico Cerasuolo spin_unlock(lock); 874f999f38bSDomenico Cerasuolo 875f999f38bSDomenico Cerasuolo /* Check for invalidate() race */ 876f999f38bSDomenico Cerasuolo spin_lock(&tree->lock); 877a65b0e76SDomenico Cerasuolo if (entry != zswap_rb_search(&tree->rbroot, swpoffset)) 878f999f38bSDomenico Cerasuolo goto unlock; 879a65b0e76SDomenico Cerasuolo 880f999f38bSDomenico Cerasuolo /* Hold a reference to prevent a free during writeback */ 881f999f38bSDomenico Cerasuolo zswap_entry_get(entry); 882f999f38bSDomenico Cerasuolo spin_unlock(&tree->lock); 883f999f38bSDomenico Cerasuolo 884a65b0e76SDomenico Cerasuolo writeback_result = zswap_writeback_entry(entry, tree); 885f999f38bSDomenico Cerasuolo 886f999f38bSDomenico Cerasuolo spin_lock(&tree->lock); 887a65b0e76SDomenico Cerasuolo if (writeback_result) { 888a65b0e76SDomenico Cerasuolo zswap_reject_reclaim_fail++; 889a65b0e76SDomenico Cerasuolo zswap_lru_putback(&entry->pool->list_lru, entry); 890a65b0e76SDomenico Cerasuolo ret = LRU_RETRY; 891b5ba474fSNhat Pham 892b5ba474fSNhat Pham /* 893b5ba474fSNhat Pham * Encountering a page already in swap cache is a sign that we are shrinking 894b5ba474fSNhat Pham * into the warmer region. We should terminate shrinking (if we're in the dynamic 895b5ba474fSNhat Pham * shrinker context). 896b5ba474fSNhat Pham */ 89727d3969bSChengming Zhou if (writeback_result == -EEXIST && encountered_page_in_swapcache) 898b5ba474fSNhat Pham *encountered_page_in_swapcache = true; 899b5ba474fSNhat Pham 900ff9d5ba2SDomenico Cerasuolo goto put_unlock; 901f999f38bSDomenico Cerasuolo } 902a65b0e76SDomenico Cerasuolo zswap_written_back_pages++; 903f999f38bSDomenico Cerasuolo 9047108cc3fSDomenico Cerasuolo if (entry->objcg) 9057108cc3fSDomenico Cerasuolo count_objcg_event(entry->objcg, ZSWPWB); 9067108cc3fSDomenico Cerasuolo 9077108cc3fSDomenico Cerasuolo count_vm_event(ZSWPWB); 908418fd29dSDomenico Cerasuolo /* 909418fd29dSDomenico Cerasuolo * Writeback started successfully, the page now belongs to the 910418fd29dSDomenico Cerasuolo * swapcache. Drop the entry from zswap - unless invalidate already 911418fd29dSDomenico Cerasuolo * took it out while we had the tree->lock released for IO. 912418fd29dSDomenico Cerasuolo */ 913418fd29dSDomenico Cerasuolo zswap_invalidate_entry(tree, entry); 914ff9d5ba2SDomenico Cerasuolo 915ff9d5ba2SDomenico Cerasuolo put_unlock: 916f999f38bSDomenico Cerasuolo /* Drop local reference */ 917f999f38bSDomenico Cerasuolo zswap_entry_put(tree, entry); 918f999f38bSDomenico Cerasuolo unlock: 919f999f38bSDomenico Cerasuolo spin_unlock(&tree->lock); 920a65b0e76SDomenico Cerasuolo spin_lock(lock); 921a65b0e76SDomenico Cerasuolo return ret; 922a65b0e76SDomenico Cerasuolo } 923a65b0e76SDomenico Cerasuolo 924a65b0e76SDomenico Cerasuolo static int shrink_memcg(struct mem_cgroup *memcg) 925a65b0e76SDomenico Cerasuolo { 926a65b0e76SDomenico Cerasuolo struct zswap_pool *pool; 927a65b0e76SDomenico Cerasuolo int nid, shrunk = 0; 928a65b0e76SDomenico Cerasuolo 929501a06feSNhat Pham if (!mem_cgroup_zswap_writeback_enabled(memcg)) 930501a06feSNhat Pham return -EINVAL; 931501a06feSNhat Pham 932a65b0e76SDomenico Cerasuolo /* 933a65b0e76SDomenico Cerasuolo * Skip zombies because their LRUs are reparented and we would be 934a65b0e76SDomenico Cerasuolo * reclaiming from the parent instead of the dead memcg. 935a65b0e76SDomenico Cerasuolo */ 936a65b0e76SDomenico Cerasuolo if (memcg && !mem_cgroup_online(memcg)) 937a65b0e76SDomenico Cerasuolo return -ENOENT; 938a65b0e76SDomenico Cerasuolo 939a65b0e76SDomenico Cerasuolo pool = zswap_pool_current_get(); 940a65b0e76SDomenico Cerasuolo if (!pool) 941a65b0e76SDomenico Cerasuolo return -EINVAL; 942a65b0e76SDomenico Cerasuolo 943a65b0e76SDomenico Cerasuolo for_each_node_state(nid, N_NORMAL_MEMORY) { 944a65b0e76SDomenico Cerasuolo unsigned long nr_to_walk = 1; 945a65b0e76SDomenico Cerasuolo 946a65b0e76SDomenico Cerasuolo shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg, 947a65b0e76SDomenico Cerasuolo &shrink_memcg_cb, NULL, &nr_to_walk); 948a65b0e76SDomenico Cerasuolo } 949a65b0e76SDomenico Cerasuolo zswap_pool_put(pool); 950a65b0e76SDomenico Cerasuolo return shrunk ? 0 : -EAGAIN; 951f999f38bSDomenico Cerasuolo } 952f999f38bSDomenico Cerasuolo 95345190f01SVitaly Wool static void shrink_worker(struct work_struct *w) 95445190f01SVitaly Wool { 95545190f01SVitaly Wool struct zswap_pool *pool = container_of(w, typeof(*pool), 95645190f01SVitaly Wool shrink_work); 957a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg; 958e0228d59SDomenico Cerasuolo int ret, failures = 0; 95945190f01SVitaly Wool 960a65b0e76SDomenico Cerasuolo /* global reclaim will select cgroup in a round-robin fashion. */ 961e0228d59SDomenico Cerasuolo do { 962a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 963a65b0e76SDomenico Cerasuolo pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL); 964a65b0e76SDomenico Cerasuolo memcg = pool->next_shrink; 965a65b0e76SDomenico Cerasuolo 966a65b0e76SDomenico Cerasuolo /* 967a65b0e76SDomenico Cerasuolo * We need to retry if we have gone through a full round trip, or if we 968a65b0e76SDomenico Cerasuolo * got an offline memcg (or else we risk undoing the effect of the 969a65b0e76SDomenico Cerasuolo * zswap memcg offlining cleanup callback). This is not catastrophic 970a65b0e76SDomenico Cerasuolo * per se, but it will keep the now offlined memcg hostage for a while. 971a65b0e76SDomenico Cerasuolo * 972a65b0e76SDomenico Cerasuolo * Note that if we got an online memcg, we will keep the extra 973a65b0e76SDomenico Cerasuolo * reference in case the original reference obtained by mem_cgroup_iter 974a65b0e76SDomenico Cerasuolo * is dropped by the zswap memcg offlining callback, ensuring that the 975a65b0e76SDomenico Cerasuolo * memcg is not killed when we are reclaiming. 976a65b0e76SDomenico Cerasuolo */ 977a65b0e76SDomenico Cerasuolo if (!memcg) { 978a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 979e0228d59SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 980e0228d59SDomenico Cerasuolo break; 981a65b0e76SDomenico Cerasuolo 982a65b0e76SDomenico Cerasuolo goto resched; 983e0228d59SDomenico Cerasuolo } 984a65b0e76SDomenico Cerasuolo 985a65b0e76SDomenico Cerasuolo if (!mem_cgroup_tryget_online(memcg)) { 986a65b0e76SDomenico Cerasuolo /* drop the reference from mem_cgroup_iter() */ 987a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, memcg); 988a65b0e76SDomenico Cerasuolo pool->next_shrink = NULL; 989a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 990a65b0e76SDomenico Cerasuolo 991a65b0e76SDomenico Cerasuolo if (++failures == MAX_RECLAIM_RETRIES) 992a65b0e76SDomenico Cerasuolo break; 993a65b0e76SDomenico Cerasuolo 994a65b0e76SDomenico Cerasuolo goto resched; 995a65b0e76SDomenico Cerasuolo } 996a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 997a65b0e76SDomenico Cerasuolo 998a65b0e76SDomenico Cerasuolo ret = shrink_memcg(memcg); 999a65b0e76SDomenico Cerasuolo /* drop the extra reference */ 1000a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1001a65b0e76SDomenico Cerasuolo 1002a65b0e76SDomenico Cerasuolo if (ret == -EINVAL) 1003a65b0e76SDomenico Cerasuolo break; 1004a65b0e76SDomenico Cerasuolo if (ret && ++failures == MAX_RECLAIM_RETRIES) 1005a65b0e76SDomenico Cerasuolo break; 1006a65b0e76SDomenico Cerasuolo 1007a65b0e76SDomenico Cerasuolo resched: 1008e0228d59SDomenico Cerasuolo cond_resched(); 1009e0228d59SDomenico Cerasuolo } while (!zswap_can_accept()); 101045190f01SVitaly Wool zswap_pool_put(pool); 101145190f01SVitaly Wool } 101245190f01SVitaly Wool 1013f1c54846SDan Streetman static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 1014f1c54846SDan Streetman { 1015b8cf32dcSYosry Ahmed int i; 1016f1c54846SDan Streetman struct zswap_pool *pool; 101732a4e169SDan Streetman char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 1018d0164adcSMel Gorman gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1019cab7a7e5SSebastian Andrzej Siewior int ret; 1020f1c54846SDan Streetman 1021bae21db8SDan Streetman if (!zswap_has_pool) { 1022bae21db8SDan Streetman /* if either are unset, pool initialization failed, and we 1023bae21db8SDan Streetman * need both params to be set correctly before trying to 1024bae21db8SDan Streetman * create a pool. 1025bae21db8SDan Streetman */ 1026bae21db8SDan Streetman if (!strcmp(type, ZSWAP_PARAM_UNSET)) 1027bae21db8SDan Streetman return NULL; 1028bae21db8SDan Streetman if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 1029bae21db8SDan Streetman return NULL; 1030bae21db8SDan Streetman } 1031bae21db8SDan Streetman 1032f1c54846SDan Streetman pool = kzalloc(sizeof(*pool), GFP_KERNEL); 1033f4ae0ce0SMarkus Elfring if (!pool) 1034f1c54846SDan Streetman return NULL; 1035f1c54846SDan Streetman 1036b8cf32dcSYosry Ahmed for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 103732a4e169SDan Streetman /* unique name for each pool specifically required by zsmalloc */ 1038b8cf32dcSYosry Ahmed snprintf(name, 38, "zswap%x", 1039b8cf32dcSYosry Ahmed atomic_inc_return(&zswap_pools_count)); 104032a4e169SDan Streetman 1041b8cf32dcSYosry Ahmed pool->zpools[i] = zpool_create_pool(type, name, gfp); 1042b8cf32dcSYosry Ahmed if (!pool->zpools[i]) { 1043f1c54846SDan Streetman pr_err("%s zpool not available\n", type); 1044f1c54846SDan Streetman goto error; 1045f1c54846SDan Streetman } 1046b8cf32dcSYosry Ahmed } 1047b8cf32dcSYosry Ahmed pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 1048f1c54846SDan Streetman 104979cd4202SZhiyuan Dai strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 10501ec3b5feSBarry Song 10511ec3b5feSBarry Song pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 10521ec3b5feSBarry Song if (!pool->acomp_ctx) { 1053f1c54846SDan Streetman pr_err("percpu alloc failed\n"); 1054f1c54846SDan Streetman goto error; 1055f1c54846SDan Streetman } 1056f1c54846SDan Streetman 1057cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 1058cab7a7e5SSebastian Andrzej Siewior &pool->node); 1059cab7a7e5SSebastian Andrzej Siewior if (ret) 1060f1c54846SDan Streetman goto error; 1061b5ba474fSNhat Pham 1062b5ba474fSNhat Pham zswap_alloc_shrinker(pool); 1063b5ba474fSNhat Pham if (!pool->shrinker) 1064b5ba474fSNhat Pham goto error; 1065b5ba474fSNhat Pham 1066f1c54846SDan Streetman pr_debug("using %s compressor\n", pool->tfm_name); 1067f1c54846SDan Streetman 1068f1c54846SDan Streetman /* being the current pool takes 1 ref; this func expects the 1069f1c54846SDan Streetman * caller to always add the new pool as the current pool 1070f1c54846SDan Streetman */ 1071f1c54846SDan Streetman kref_init(&pool->kref); 1072f1c54846SDan Streetman INIT_LIST_HEAD(&pool->list); 1073b5ba474fSNhat Pham if (list_lru_init_memcg(&pool->list_lru, pool->shrinker)) 1074b5ba474fSNhat Pham goto lru_fail; 1075b5ba474fSNhat Pham shrinker_register(pool->shrinker); 107645190f01SVitaly Wool INIT_WORK(&pool->shrink_work, shrink_worker); 1077b5ba474fSNhat Pham atomic_set(&pool->nr_stored, 0); 1078f1c54846SDan Streetman 1079f1c54846SDan Streetman zswap_pool_debug("created", pool); 1080f1c54846SDan Streetman 1081f1c54846SDan Streetman return pool; 1082f1c54846SDan Streetman 1083b5ba474fSNhat Pham lru_fail: 1084b5ba474fSNhat Pham list_lru_destroy(&pool->list_lru); 1085b5ba474fSNhat Pham shrinker_free(pool->shrinker); 1086f1c54846SDan Streetman error: 10871ec3b5feSBarry Song if (pool->acomp_ctx) 10881ec3b5feSBarry Song free_percpu(pool->acomp_ctx); 1089b8cf32dcSYosry Ahmed while (i--) 1090b8cf32dcSYosry Ahmed zpool_destroy_pool(pool->zpools[i]); 1091f1c54846SDan Streetman kfree(pool); 1092f1c54846SDan Streetman return NULL; 1093f1c54846SDan Streetman } 1094f1c54846SDan Streetman 1095141fdeecSLiu Shixin static struct zswap_pool *__zswap_pool_create_fallback(void) 1096f1c54846SDan Streetman { 1097bae21db8SDan Streetman bool has_comp, has_zpool; 1098bae21db8SDan Streetman 10991ec3b5feSBarry Song has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 1100bb8b93b5SMaciej S. Szmigiero if (!has_comp && strcmp(zswap_compressor, 1101bb8b93b5SMaciej S. Szmigiero CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 1102f1c54846SDan Streetman pr_err("compressor %s not available, using default %s\n", 1103bb8b93b5SMaciej S. Szmigiero zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 1104c99b42c3SDan Streetman param_free_charp(&zswap_compressor); 1105bb8b93b5SMaciej S. Szmigiero zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 11061ec3b5feSBarry Song has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 1107f1c54846SDan Streetman } 1108bae21db8SDan Streetman if (!has_comp) { 1109bae21db8SDan Streetman pr_err("default compressor %s not available\n", 1110bae21db8SDan Streetman zswap_compressor); 1111bae21db8SDan Streetman param_free_charp(&zswap_compressor); 1112bae21db8SDan Streetman zswap_compressor = ZSWAP_PARAM_UNSET; 1113c99b42c3SDan Streetman } 1114bae21db8SDan Streetman 1115bae21db8SDan Streetman has_zpool = zpool_has_pool(zswap_zpool_type); 1116bb8b93b5SMaciej S. Szmigiero if (!has_zpool && strcmp(zswap_zpool_type, 1117bb8b93b5SMaciej S. Szmigiero CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 1118f1c54846SDan Streetman pr_err("zpool %s not available, using default %s\n", 1119bb8b93b5SMaciej S. Szmigiero zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 1120c99b42c3SDan Streetman param_free_charp(&zswap_zpool_type); 1121bb8b93b5SMaciej S. Szmigiero zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 1122bae21db8SDan Streetman has_zpool = zpool_has_pool(zswap_zpool_type); 1123f1c54846SDan Streetman } 1124bae21db8SDan Streetman if (!has_zpool) { 1125bae21db8SDan Streetman pr_err("default zpool %s not available\n", 1126bae21db8SDan Streetman zswap_zpool_type); 1127bae21db8SDan Streetman param_free_charp(&zswap_zpool_type); 1128bae21db8SDan Streetman zswap_zpool_type = ZSWAP_PARAM_UNSET; 1129bae21db8SDan Streetman } 1130bae21db8SDan Streetman 1131bae21db8SDan Streetman if (!has_comp || !has_zpool) 1132bae21db8SDan Streetman return NULL; 1133f1c54846SDan Streetman 1134f1c54846SDan Streetman return zswap_pool_create(zswap_zpool_type, zswap_compressor); 1135f1c54846SDan Streetman } 1136f1c54846SDan Streetman 1137f1c54846SDan Streetman static void zswap_pool_destroy(struct zswap_pool *pool) 1138f1c54846SDan Streetman { 1139b8cf32dcSYosry Ahmed int i; 1140b8cf32dcSYosry Ahmed 1141f1c54846SDan Streetman zswap_pool_debug("destroying", pool); 1142f1c54846SDan Streetman 1143b5ba474fSNhat Pham shrinker_free(pool->shrinker); 1144cab7a7e5SSebastian Andrzej Siewior cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 11451ec3b5feSBarry Song free_percpu(pool->acomp_ctx); 1146a65b0e76SDomenico Cerasuolo list_lru_destroy(&pool->list_lru); 1147a65b0e76SDomenico Cerasuolo 1148a65b0e76SDomenico Cerasuolo spin_lock(&zswap_pools_lock); 1149a65b0e76SDomenico Cerasuolo mem_cgroup_iter_break(NULL, pool->next_shrink); 1150a65b0e76SDomenico Cerasuolo pool->next_shrink = NULL; 1151a65b0e76SDomenico Cerasuolo spin_unlock(&zswap_pools_lock); 1152a65b0e76SDomenico Cerasuolo 1153b8cf32dcSYosry Ahmed for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 1154b8cf32dcSYosry Ahmed zpool_destroy_pool(pool->zpools[i]); 1155f1c54846SDan Streetman kfree(pool); 1156f1c54846SDan Streetman } 1157f1c54846SDan Streetman 1158f1c54846SDan Streetman static int __must_check zswap_pool_get(struct zswap_pool *pool) 1159f1c54846SDan Streetman { 1160ae3d89a7SDan Streetman if (!pool) 1161ae3d89a7SDan Streetman return 0; 1162ae3d89a7SDan Streetman 1163f1c54846SDan Streetman return kref_get_unless_zero(&pool->kref); 1164f1c54846SDan Streetman } 1165f1c54846SDan Streetman 1166200867afSDan Streetman static void __zswap_pool_release(struct work_struct *work) 1167f1c54846SDan Streetman { 116845190f01SVitaly Wool struct zswap_pool *pool = container_of(work, typeof(*pool), 116945190f01SVitaly Wool release_work); 1170200867afSDan Streetman 1171200867afSDan Streetman synchronize_rcu(); 1172f1c54846SDan Streetman 1173f1c54846SDan Streetman /* nobody should have been able to get a kref... */ 1174f1c54846SDan Streetman WARN_ON(kref_get_unless_zero(&pool->kref)); 1175f1c54846SDan Streetman 1176f1c54846SDan Streetman /* pool is now off zswap_pools list and has no references. */ 1177f1c54846SDan Streetman zswap_pool_destroy(pool); 1178f1c54846SDan Streetman } 1179f1c54846SDan Streetman 1180f1c54846SDan Streetman static void __zswap_pool_empty(struct kref *kref) 1181f1c54846SDan Streetman { 1182f1c54846SDan Streetman struct zswap_pool *pool; 1183f1c54846SDan Streetman 1184f1c54846SDan Streetman pool = container_of(kref, typeof(*pool), kref); 1185f1c54846SDan Streetman 1186f1c54846SDan Streetman spin_lock(&zswap_pools_lock); 1187f1c54846SDan Streetman 1188f1c54846SDan Streetman WARN_ON(pool == zswap_pool_current()); 1189f1c54846SDan Streetman 1190f1c54846SDan Streetman list_del_rcu(&pool->list); 1191200867afSDan Streetman 119245190f01SVitaly Wool INIT_WORK(&pool->release_work, __zswap_pool_release); 119345190f01SVitaly Wool schedule_work(&pool->release_work); 1194f1c54846SDan Streetman 1195f1c54846SDan Streetman spin_unlock(&zswap_pools_lock); 1196f1c54846SDan Streetman } 1197f1c54846SDan Streetman 1198f1c54846SDan Streetman static void zswap_pool_put(struct zswap_pool *pool) 1199f1c54846SDan Streetman { 1200f1c54846SDan Streetman kref_put(&pool->kref, __zswap_pool_empty); 12012b281117SSeth Jennings } 12022b281117SSeth Jennings 12032b281117SSeth Jennings /********************************* 120490b0fc26SDan Streetman * param callbacks 120590b0fc26SDan Streetman **********************************/ 120690b0fc26SDan Streetman 1207141fdeecSLiu Shixin static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 1208141fdeecSLiu Shixin { 1209141fdeecSLiu Shixin /* no change required */ 1210141fdeecSLiu Shixin if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 1211141fdeecSLiu Shixin return false; 1212141fdeecSLiu Shixin return true; 1213141fdeecSLiu Shixin } 1214141fdeecSLiu Shixin 1215c99b42c3SDan Streetman /* val must be a null-terminated string */ 121690b0fc26SDan Streetman static int __zswap_param_set(const char *val, const struct kernel_param *kp, 121790b0fc26SDan Streetman char *type, char *compressor) 121890b0fc26SDan Streetman { 121990b0fc26SDan Streetman struct zswap_pool *pool, *put_pool = NULL; 1220c99b42c3SDan Streetman char *s = strstrip((char *)val); 1221141fdeecSLiu Shixin int ret = 0; 1222141fdeecSLiu Shixin bool new_pool = false; 122390b0fc26SDan Streetman 1224141fdeecSLiu Shixin mutex_lock(&zswap_init_lock); 12259021ccecSLiu Shixin switch (zswap_init_state) { 12269021ccecSLiu Shixin case ZSWAP_UNINIT: 122790b0fc26SDan Streetman /* if this is load-time (pre-init) param setting, 122890b0fc26SDan Streetman * don't create a pool; that's done during init. 122990b0fc26SDan Streetman */ 1230141fdeecSLiu Shixin ret = param_set_charp(s, kp); 1231141fdeecSLiu Shixin break; 12329021ccecSLiu Shixin case ZSWAP_INIT_SUCCEED: 1233141fdeecSLiu Shixin new_pool = zswap_pool_changed(s, kp); 12349021ccecSLiu Shixin break; 12359021ccecSLiu Shixin case ZSWAP_INIT_FAILED: 12369021ccecSLiu Shixin pr_err("can't set param, initialization failed\n"); 1237141fdeecSLiu Shixin ret = -ENODEV; 12389021ccecSLiu Shixin } 1239141fdeecSLiu Shixin mutex_unlock(&zswap_init_lock); 1240141fdeecSLiu Shixin 1241141fdeecSLiu Shixin /* no need to create a new pool, return directly */ 1242141fdeecSLiu Shixin if (!new_pool) 1243141fdeecSLiu Shixin return ret; 124490b0fc26SDan Streetman 124590b0fc26SDan Streetman if (!type) { 1246c99b42c3SDan Streetman if (!zpool_has_pool(s)) { 1247c99b42c3SDan Streetman pr_err("zpool %s not available\n", s); 1248c99b42c3SDan Streetman return -ENOENT; 1249c99b42c3SDan Streetman } 125090b0fc26SDan Streetman type = s; 125190b0fc26SDan Streetman } else if (!compressor) { 12521ec3b5feSBarry Song if (!crypto_has_acomp(s, 0, 0)) { 1253c99b42c3SDan Streetman pr_err("compressor %s not available\n", s); 125490b0fc26SDan Streetman return -ENOENT; 125590b0fc26SDan Streetman } 1256c99b42c3SDan Streetman compressor = s; 1257c99b42c3SDan Streetman } else { 1258c99b42c3SDan Streetman WARN_ON(1); 1259c99b42c3SDan Streetman return -EINVAL; 126090b0fc26SDan Streetman } 126190b0fc26SDan Streetman 126290b0fc26SDan Streetman spin_lock(&zswap_pools_lock); 126390b0fc26SDan Streetman 126490b0fc26SDan Streetman pool = zswap_pool_find_get(type, compressor); 126590b0fc26SDan Streetman if (pool) { 126690b0fc26SDan Streetman zswap_pool_debug("using existing", pool); 1267fd5bb66cSDan Streetman WARN_ON(pool == zswap_pool_current()); 126890b0fc26SDan Streetman list_del_rcu(&pool->list); 126990b0fc26SDan Streetman } 127090b0fc26SDan Streetman 1271fd5bb66cSDan Streetman spin_unlock(&zswap_pools_lock); 1272fd5bb66cSDan Streetman 1273fd5bb66cSDan Streetman if (!pool) 1274fd5bb66cSDan Streetman pool = zswap_pool_create(type, compressor); 1275fd5bb66cSDan Streetman 127690b0fc26SDan Streetman if (pool) 1277c99b42c3SDan Streetman ret = param_set_charp(s, kp); 127890b0fc26SDan Streetman else 127990b0fc26SDan Streetman ret = -EINVAL; 128090b0fc26SDan Streetman 1281fd5bb66cSDan Streetman spin_lock(&zswap_pools_lock); 1282fd5bb66cSDan Streetman 128390b0fc26SDan Streetman if (!ret) { 128490b0fc26SDan Streetman put_pool = zswap_pool_current(); 128590b0fc26SDan Streetman list_add_rcu(&pool->list, &zswap_pools); 1286ae3d89a7SDan Streetman zswap_has_pool = true; 128790b0fc26SDan Streetman } else if (pool) { 128890b0fc26SDan Streetman /* add the possibly pre-existing pool to the end of the pools 128990b0fc26SDan Streetman * list; if it's new (and empty) then it'll be removed and 129090b0fc26SDan Streetman * destroyed by the put after we drop the lock 129190b0fc26SDan Streetman */ 129290b0fc26SDan Streetman list_add_tail_rcu(&pool->list, &zswap_pools); 129390b0fc26SDan Streetman put_pool = pool; 1294fd5bb66cSDan Streetman } 1295fd5bb66cSDan Streetman 1296fd5bb66cSDan Streetman spin_unlock(&zswap_pools_lock); 1297fd5bb66cSDan Streetman 1298fd5bb66cSDan Streetman if (!zswap_has_pool && !pool) { 1299ae3d89a7SDan Streetman /* if initial pool creation failed, and this pool creation also 1300ae3d89a7SDan Streetman * failed, maybe both compressor and zpool params were bad. 1301ae3d89a7SDan Streetman * Allow changing this param, so pool creation will succeed 1302ae3d89a7SDan Streetman * when the other param is changed. We already verified this 13031ec3b5feSBarry Song * param is ok in the zpool_has_pool() or crypto_has_acomp() 1304ae3d89a7SDan Streetman * checks above. 1305ae3d89a7SDan Streetman */ 1306ae3d89a7SDan Streetman ret = param_set_charp(s, kp); 130790b0fc26SDan Streetman } 130890b0fc26SDan Streetman 130990b0fc26SDan Streetman /* drop the ref from either the old current pool, 131090b0fc26SDan Streetman * or the new pool we failed to add 131190b0fc26SDan Streetman */ 131290b0fc26SDan Streetman if (put_pool) 131390b0fc26SDan Streetman zswap_pool_put(put_pool); 131490b0fc26SDan Streetman 131590b0fc26SDan Streetman return ret; 131690b0fc26SDan Streetman } 131790b0fc26SDan Streetman 131890b0fc26SDan Streetman static int zswap_compressor_param_set(const char *val, 131990b0fc26SDan Streetman const struct kernel_param *kp) 132090b0fc26SDan Streetman { 132190b0fc26SDan Streetman return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 132290b0fc26SDan Streetman } 132390b0fc26SDan Streetman 132490b0fc26SDan Streetman static int zswap_zpool_param_set(const char *val, 132590b0fc26SDan Streetman const struct kernel_param *kp) 132690b0fc26SDan Streetman { 132790b0fc26SDan Streetman return __zswap_param_set(val, kp, NULL, zswap_compressor); 132890b0fc26SDan Streetman } 132990b0fc26SDan Streetman 1330d7b028f5SDan Streetman static int zswap_enabled_param_set(const char *val, 1331d7b028f5SDan Streetman const struct kernel_param *kp) 1332d7b028f5SDan Streetman { 1333141fdeecSLiu Shixin int ret = -ENODEV; 1334d7b028f5SDan Streetman 1335141fdeecSLiu Shixin /* if this is load-time (pre-init) param setting, only set param. */ 1336141fdeecSLiu Shixin if (system_state != SYSTEM_RUNNING) 1337d7b028f5SDan Streetman return param_set_bool(val, kp); 1338141fdeecSLiu Shixin 1339141fdeecSLiu Shixin mutex_lock(&zswap_init_lock); 13409021ccecSLiu Shixin switch (zswap_init_state) { 13419021ccecSLiu Shixin case ZSWAP_UNINIT: 1342141fdeecSLiu Shixin if (zswap_setup()) 1343141fdeecSLiu Shixin break; 1344141fdeecSLiu Shixin fallthrough; 13459021ccecSLiu Shixin case ZSWAP_INIT_SUCCEED: 1346141fdeecSLiu Shixin if (!zswap_has_pool) 13479021ccecSLiu Shixin pr_err("can't enable, no pool configured\n"); 1348141fdeecSLiu Shixin else 1349141fdeecSLiu Shixin ret = param_set_bool(val, kp); 1350141fdeecSLiu Shixin break; 13519021ccecSLiu Shixin case ZSWAP_INIT_FAILED: 1352d7b028f5SDan Streetman pr_err("can't enable, initialization failed\n"); 1353d7b028f5SDan Streetman } 1354141fdeecSLiu Shixin mutex_unlock(&zswap_init_lock); 1355141fdeecSLiu Shixin 1356141fdeecSLiu Shixin return ret; 1357d7b028f5SDan Streetman } 1358d7b028f5SDan Streetman 135932acba4cSChengming Zhou static void __zswap_load(struct zswap_entry *entry, struct page *page) 136032acba4cSChengming Zhou { 136132acba4cSChengming Zhou struct zpool *zpool = zswap_find_zpool(entry); 136232acba4cSChengming Zhou struct scatterlist input, output; 136332acba4cSChengming Zhou struct crypto_acomp_ctx *acomp_ctx; 136432acba4cSChengming Zhou u8 *src; 136532acba4cSChengming Zhou 136632acba4cSChengming Zhou acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 13678ba2f844SChengming Zhou mutex_lock(&acomp_ctx->mutex); 136832acba4cSChengming Zhou 136932acba4cSChengming Zhou src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 137032acba4cSChengming Zhou if (!zpool_can_sleep_mapped(zpool)) { 13718ba2f844SChengming Zhou memcpy(acomp_ctx->buffer, src, entry->length); 13728ba2f844SChengming Zhou src = acomp_ctx->buffer; 137332acba4cSChengming Zhou zpool_unmap_handle(zpool, entry->handle); 137432acba4cSChengming Zhou } 137532acba4cSChengming Zhou 137632acba4cSChengming Zhou sg_init_one(&input, src, entry->length); 137732acba4cSChengming Zhou sg_init_table(&output, 1); 137832acba4cSChengming Zhou sg_set_page(&output, page, PAGE_SIZE, 0); 137932acba4cSChengming Zhou acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); 138032acba4cSChengming Zhou BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); 138132acba4cSChengming Zhou BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); 13828ba2f844SChengming Zhou mutex_unlock(&acomp_ctx->mutex); 138332acba4cSChengming Zhou 138432acba4cSChengming Zhou if (zpool_can_sleep_mapped(zpool)) 138532acba4cSChengming Zhou zpool_unmap_handle(zpool, entry->handle); 138632acba4cSChengming Zhou } 138732acba4cSChengming Zhou 138890b0fc26SDan Streetman /********************************* 13892b281117SSeth Jennings * writeback code 13902b281117SSeth Jennings **********************************/ 13912b281117SSeth Jennings /* 139296c7b0b4SMatthew Wilcox (Oracle) * Attempts to free an entry by adding a folio to the swap cache, 139396c7b0b4SMatthew Wilcox (Oracle) * decompressing the entry data into the folio, and issuing a 139496c7b0b4SMatthew Wilcox (Oracle) * bio write to write the folio back to the swap device. 13952b281117SSeth Jennings * 139696c7b0b4SMatthew Wilcox (Oracle) * This can be thought of as a "resumed writeback" of the folio 13972b281117SSeth Jennings * to the swap device. We are basically resuming the same swap 139842c06a0eSJohannes Weiner * writeback path that was intercepted with the zswap_store() 139996c7b0b4SMatthew Wilcox (Oracle) * in the first place. After the folio has been decompressed into 14002b281117SSeth Jennings * the swap cache, the compressed version stored by zswap can be 14012b281117SSeth Jennings * freed. 14022b281117SSeth Jennings */ 14030bb48849SDomenico Cerasuolo static int zswap_writeback_entry(struct zswap_entry *entry, 1404ff9d5ba2SDomenico Cerasuolo struct zswap_tree *tree) 14052b281117SSeth Jennings { 14060bb48849SDomenico Cerasuolo swp_entry_t swpentry = entry->swpentry; 140796c7b0b4SMatthew Wilcox (Oracle) struct folio *folio; 1408ddc1a5cbSHugh Dickins struct mempolicy *mpol; 140996c7b0b4SMatthew Wilcox (Oracle) bool folio_was_allocated; 14102b281117SSeth Jennings struct writeback_control wbc = { 14112b281117SSeth Jennings .sync_mode = WB_SYNC_NONE, 14122b281117SSeth Jennings }; 14132b281117SSeth Jennings 141496c7b0b4SMatthew Wilcox (Oracle) /* try to allocate swap cache folio */ 1415ddc1a5cbSHugh Dickins mpol = get_task_policy(current); 141696c7b0b4SMatthew Wilcox (Oracle) folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 141796c7b0b4SMatthew Wilcox (Oracle) NO_INTERLEAVE_INDEX, &folio_was_allocated, true); 141896c7b0b4SMatthew Wilcox (Oracle) if (!folio) 1419e947ba0bSChengming Zhou return -ENOMEM; 14202b281117SSeth Jennings 1421e947ba0bSChengming Zhou /* 142296c7b0b4SMatthew Wilcox (Oracle) * Found an existing folio, we raced with load/swapin. We generally 142396c7b0b4SMatthew Wilcox (Oracle) * writeback cold folios from zswap, and swapin means the folio just 142496c7b0b4SMatthew Wilcox (Oracle) * became hot. Skip this folio and let the caller find another one. 1425e947ba0bSChengming Zhou */ 142696c7b0b4SMatthew Wilcox (Oracle) if (!folio_was_allocated) { 142796c7b0b4SMatthew Wilcox (Oracle) folio_put(folio); 1428e947ba0bSChengming Zhou return -EEXIST; 142998804a94SJohannes Weiner } 14302b281117SSeth Jennings 143104fc7816SDomenico Cerasuolo /* 143296c7b0b4SMatthew Wilcox (Oracle) * folio is locked, and the swapcache is now secured against 143398804a94SJohannes Weiner * concurrent swapping to and from the slot. Verify that the 143498804a94SJohannes Weiner * swap entry hasn't been invalidated and recycled behind our 143598804a94SJohannes Weiner * backs (our zswap_entry reference doesn't prevent that), to 143696c7b0b4SMatthew Wilcox (Oracle) * avoid overwriting a new swap folio with old compressed data. 143704fc7816SDomenico Cerasuolo */ 143804fc7816SDomenico Cerasuolo spin_lock(&tree->lock); 14390bb48849SDomenico Cerasuolo if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { 144004fc7816SDomenico Cerasuolo spin_unlock(&tree->lock); 144196c7b0b4SMatthew Wilcox (Oracle) delete_from_swap_cache(folio); 1442e3b63e96SYosry Ahmed folio_unlock(folio); 1443e3b63e96SYosry Ahmed folio_put(folio); 1444e947ba0bSChengming Zhou return -ENOMEM; 144504fc7816SDomenico Cerasuolo } 144604fc7816SDomenico Cerasuolo spin_unlock(&tree->lock); 144704fc7816SDomenico Cerasuolo 144896c7b0b4SMatthew Wilcox (Oracle) __zswap_load(entry, &folio->page); 14492b281117SSeth Jennings 145096c7b0b4SMatthew Wilcox (Oracle) /* folio is up to date */ 145196c7b0b4SMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 14522b281117SSeth Jennings 1453b349acc7SWeijie Yang /* move it to the tail of the inactive list after end_writeback */ 145496c7b0b4SMatthew Wilcox (Oracle) folio_set_reclaim(folio); 1455b349acc7SWeijie Yang 14562b281117SSeth Jennings /* start writeback */ 1457b99b4e0dSMatthew Wilcox (Oracle) __swap_writepage(folio, &wbc); 145896c7b0b4SMatthew Wilcox (Oracle) folio_put(folio); 14592b281117SSeth Jennings 1460e947ba0bSChengming Zhou return 0; 14612b281117SSeth Jennings } 14622b281117SSeth Jennings 1463a85f878bSSrividya Desireddy static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1464a85f878bSSrividya Desireddy { 1465a85f878bSSrividya Desireddy unsigned long *page; 146662bf1258STaejoon Song unsigned long val; 146762bf1258STaejoon Song unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1468a85f878bSSrividya Desireddy 1469a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 147062bf1258STaejoon Song val = page[0]; 147162bf1258STaejoon Song 147262bf1258STaejoon Song if (val != page[last_pos]) 147362bf1258STaejoon Song return 0; 147462bf1258STaejoon Song 147562bf1258STaejoon Song for (pos = 1; pos < last_pos; pos++) { 147662bf1258STaejoon Song if (val != page[pos]) 1477a85f878bSSrividya Desireddy return 0; 1478a85f878bSSrividya Desireddy } 147962bf1258STaejoon Song 148062bf1258STaejoon Song *value = val; 148162bf1258STaejoon Song 1482a85f878bSSrividya Desireddy return 1; 1483a85f878bSSrividya Desireddy } 1484a85f878bSSrividya Desireddy 1485a85f878bSSrividya Desireddy static void zswap_fill_page(void *ptr, unsigned long value) 1486a85f878bSSrividya Desireddy { 1487a85f878bSSrividya Desireddy unsigned long *page; 1488a85f878bSSrividya Desireddy 1489a85f878bSSrividya Desireddy page = (unsigned long *)ptr; 1490a85f878bSSrividya Desireddy memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1491a85f878bSSrividya Desireddy } 1492a85f878bSSrividya Desireddy 149334f4c198SMatthew Wilcox (Oracle) bool zswap_store(struct folio *folio) 14942b281117SSeth Jennings { 14953d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 149642c06a0eSJohannes Weiner int type = swp_type(swp); 149742c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 149834f4c198SMatthew Wilcox (Oracle) struct page *page = &folio->page; 14992b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 15002b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 15011ec3b5feSBarry Song struct scatterlist input, output; 15021ec3b5feSBarry Song struct crypto_acomp_ctx *acomp_ctx; 1503f4840ccfSJohannes Weiner struct obj_cgroup *objcg = NULL; 1504a65b0e76SDomenico Cerasuolo struct mem_cgroup *memcg = NULL; 1505f4840ccfSJohannes Weiner struct zswap_pool *pool; 1506b8cf32dcSYosry Ahmed struct zpool *zpool; 15070bb48849SDomenico Cerasuolo unsigned int dlen = PAGE_SIZE; 1508a85f878bSSrividya Desireddy unsigned long handle, value; 15092b281117SSeth Jennings char *buf; 15102b281117SSeth Jennings u8 *src, *dst; 1511d2fcd82bSHui Zhu gfp_t gfp; 151242c06a0eSJohannes Weiner int ret; 151342c06a0eSJohannes Weiner 151434f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 151534f4c198SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 15162b281117SSeth Jennings 151734f4c198SMatthew Wilcox (Oracle) /* Large folios aren't supported */ 151834f4c198SMatthew Wilcox (Oracle) if (folio_test_large(folio)) 151942c06a0eSJohannes Weiner return false; 15207ba71669SHuang Ying 15210bdf0efaSNhat Pham /* 1522ca56489cSDomenico Cerasuolo * If this is a duplicate, it must be removed before attempting to store 1523ca56489cSDomenico Cerasuolo * it, otherwise, if the store fails the old page won't be removed from 1524ca56489cSDomenico Cerasuolo * the tree, and it might be written back overriding the new data. 1525ca56489cSDomenico Cerasuolo */ 1526ca56489cSDomenico Cerasuolo spin_lock(&tree->lock); 1527ca56489cSDomenico Cerasuolo dupentry = zswap_rb_search(&tree->rbroot, offset); 1528ca56489cSDomenico Cerasuolo if (dupentry) { 1529ca56489cSDomenico Cerasuolo zswap_duplicate_entry++; 1530ca56489cSDomenico Cerasuolo zswap_invalidate_entry(tree, dupentry); 1531ca56489cSDomenico Cerasuolo } 1532ca56489cSDomenico Cerasuolo spin_unlock(&tree->lock); 1533678e54d4SChengming Zhou 1534678e54d4SChengming Zhou if (!zswap_enabled) 1535678e54d4SChengming Zhou return false; 1536678e54d4SChengming Zhou 1537074e3e26SMatthew Wilcox (Oracle) objcg = get_obj_cgroup_from_folio(folio); 1538a65b0e76SDomenico Cerasuolo if (objcg && !obj_cgroup_may_zswap(objcg)) { 1539a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1540a65b0e76SDomenico Cerasuolo if (shrink_memcg(memcg)) { 1541a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 15420bdf0efaSNhat Pham goto reject; 1543a65b0e76SDomenico Cerasuolo } 1544a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1545a65b0e76SDomenico Cerasuolo } 1546f4840ccfSJohannes Weiner 15472b281117SSeth Jennings /* reclaim space if needed */ 15482b281117SSeth Jennings if (zswap_is_full()) { 15492b281117SSeth Jennings zswap_pool_limit_hit++; 155045190f01SVitaly Wool zswap_pool_reached_full = true; 1551f4840ccfSJohannes Weiner goto shrink; 15522b281117SSeth Jennings } 155316e536efSLi Wang 155445190f01SVitaly Wool if (zswap_pool_reached_full) { 155542c06a0eSJohannes Weiner if (!zswap_can_accept()) 1556e0228d59SDomenico Cerasuolo goto shrink; 155742c06a0eSJohannes Weiner else 155845190f01SVitaly Wool zswap_pool_reached_full = false; 15592b281117SSeth Jennings } 15602b281117SSeth Jennings 15612b281117SSeth Jennings /* allocate entry */ 1562a65b0e76SDomenico Cerasuolo entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); 15632b281117SSeth Jennings if (!entry) { 15642b281117SSeth Jennings zswap_reject_kmemcache_fail++; 15652b281117SSeth Jennings goto reject; 15662b281117SSeth Jennings } 15672b281117SSeth Jennings 1568a85f878bSSrividya Desireddy if (zswap_same_filled_pages_enabled) { 1569003ae2fbSFabio M. De Francesco src = kmap_local_page(page); 1570a85f878bSSrividya Desireddy if (zswap_is_page_same_filled(src, &value)) { 1571003ae2fbSFabio M. De Francesco kunmap_local(src); 15720bb48849SDomenico Cerasuolo entry->swpentry = swp_entry(type, offset); 1573a85f878bSSrividya Desireddy entry->length = 0; 1574a85f878bSSrividya Desireddy entry->value = value; 1575a85f878bSSrividya Desireddy atomic_inc(&zswap_same_filled_pages); 1576a85f878bSSrividya Desireddy goto insert_entry; 1577a85f878bSSrividya Desireddy } 1578003ae2fbSFabio M. De Francesco kunmap_local(src); 1579a85f878bSSrividya Desireddy } 1580a85f878bSSrividya Desireddy 158142c06a0eSJohannes Weiner if (!zswap_non_same_filled_pages_enabled) 1582cb325dddSMaciej S. Szmigiero goto freepage; 1583cb325dddSMaciej S. Szmigiero 1584f1c54846SDan Streetman /* if entry is successfully added, it keeps the reference */ 1585f1c54846SDan Streetman entry->pool = zswap_pool_current_get(); 158642c06a0eSJohannes Weiner if (!entry->pool) 15872b281117SSeth Jennings goto freepage; 15882b281117SSeth Jennings 1589a65b0e76SDomenico Cerasuolo if (objcg) { 1590a65b0e76SDomenico Cerasuolo memcg = get_mem_cgroup_from_objcg(objcg); 1591a65b0e76SDomenico Cerasuolo if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) { 1592a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1593a65b0e76SDomenico Cerasuolo goto put_pool; 1594a65b0e76SDomenico Cerasuolo } 1595a65b0e76SDomenico Cerasuolo mem_cgroup_put(memcg); 1596a65b0e76SDomenico Cerasuolo } 1597a65b0e76SDomenico Cerasuolo 1598f1c54846SDan Streetman /* compress */ 15991ec3b5feSBarry Song acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 16001ec3b5feSBarry Song 16018ba2f844SChengming Zhou mutex_lock(&acomp_ctx->mutex); 16021ec3b5feSBarry Song 16038ba2f844SChengming Zhou dst = acomp_ctx->buffer; 16041ec3b5feSBarry Song sg_init_table(&input, 1); 160596c7b0b4SMatthew Wilcox (Oracle) sg_set_page(&input, &folio->page, PAGE_SIZE, 0); 16061ec3b5feSBarry Song 16078ba2f844SChengming Zhou /* 16088ba2f844SChengming Zhou * We need PAGE_SIZE * 2 here since there maybe over-compression case, 16098ba2f844SChengming Zhou * and hardware-accelerators may won't check the dst buffer size, so 16108ba2f844SChengming Zhou * giving the dst buffer with enough length to avoid buffer overflow. 16118ba2f844SChengming Zhou */ 16121ec3b5feSBarry Song sg_init_one(&output, dst, PAGE_SIZE * 2); 16131ec3b5feSBarry Song acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 16141ec3b5feSBarry Song /* 16151ec3b5feSBarry Song * it maybe looks a little bit silly that we send an asynchronous request, 16161ec3b5feSBarry Song * then wait for its completion synchronously. This makes the process look 16171ec3b5feSBarry Song * synchronous in fact. 16181ec3b5feSBarry Song * Theoretically, acomp supports users send multiple acomp requests in one 16191ec3b5feSBarry Song * acomp instance, then get those requests done simultaneously. but in this 162042c06a0eSJohannes Weiner * case, zswap actually does store and load page by page, there is no 16211ec3b5feSBarry Song * existing method to send the second page before the first page is done 162242c06a0eSJohannes Weiner * in one thread doing zwap. 16231ec3b5feSBarry Song * but in different threads running on different cpu, we have different 16241ec3b5feSBarry Song * acomp instance, so multiple threads can do (de)compression in parallel. 16251ec3b5feSBarry Song */ 16261ec3b5feSBarry Song ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 16271ec3b5feSBarry Song dlen = acomp_ctx->req->dlen; 16281ec3b5feSBarry Song 1629cb61dad8SNhat Pham if (ret) { 1630cb61dad8SNhat Pham zswap_reject_compress_fail++; 1631f1c54846SDan Streetman goto put_dstmem; 1632cb61dad8SNhat Pham } 1633f1c54846SDan Streetman 16342b281117SSeth Jennings /* store */ 1635b8cf32dcSYosry Ahmed zpool = zswap_find_zpool(entry); 1636d2fcd82bSHui Zhu gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1637b8cf32dcSYosry Ahmed if (zpool_malloc_support_movable(zpool)) 1638d2fcd82bSHui Zhu gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1639b8cf32dcSYosry Ahmed ret = zpool_malloc(zpool, dlen, gfp, &handle); 16402b281117SSeth Jennings if (ret == -ENOSPC) { 16412b281117SSeth Jennings zswap_reject_compress_poor++; 1642f1c54846SDan Streetman goto put_dstmem; 16432b281117SSeth Jennings } 16442b281117SSeth Jennings if (ret) { 16452b281117SSeth Jennings zswap_reject_alloc_fail++; 1646f1c54846SDan Streetman goto put_dstmem; 16472b281117SSeth Jennings } 1648b8cf32dcSYosry Ahmed buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 16490bb48849SDomenico Cerasuolo memcpy(buf, dst, dlen); 1650b8cf32dcSYosry Ahmed zpool_unmap_handle(zpool, handle); 16518ba2f844SChengming Zhou mutex_unlock(&acomp_ctx->mutex); 16522b281117SSeth Jennings 16532b281117SSeth Jennings /* populate entry */ 16540bb48849SDomenico Cerasuolo entry->swpentry = swp_entry(type, offset); 16552b281117SSeth Jennings entry->handle = handle; 16562b281117SSeth Jennings entry->length = dlen; 16572b281117SSeth Jennings 1658a85f878bSSrividya Desireddy insert_entry: 1659f4840ccfSJohannes Weiner entry->objcg = objcg; 1660f4840ccfSJohannes Weiner if (objcg) { 1661f4840ccfSJohannes Weiner obj_cgroup_charge_zswap(objcg, entry->length); 1662f4840ccfSJohannes Weiner /* Account before objcg ref is moved to tree */ 1663f4840ccfSJohannes Weiner count_objcg_event(objcg, ZSWPOUT); 1664f4840ccfSJohannes Weiner } 1665f4840ccfSJohannes Weiner 16662b281117SSeth Jennings /* map */ 16672b281117SSeth Jennings spin_lock(&tree->lock); 1668ca56489cSDomenico Cerasuolo /* 1669ca56489cSDomenico Cerasuolo * A duplicate entry should have been removed at the beginning of this 1670ca56489cSDomenico Cerasuolo * function. Since the swap entry should be pinned, if a duplicate is 1671ca56489cSDomenico Cerasuolo * found again here it means that something went wrong in the swap 1672ca56489cSDomenico Cerasuolo * cache. 1673ca56489cSDomenico Cerasuolo */ 167442c06a0eSJohannes Weiner while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 1675ca56489cSDomenico Cerasuolo WARN_ON(1); 16762b281117SSeth Jennings zswap_duplicate_entry++; 167756c67049SJohannes Weiner zswap_invalidate_entry(tree, dupentry); 16782b281117SSeth Jennings } 167935499e2bSDomenico Cerasuolo if (entry->length) { 1680a65b0e76SDomenico Cerasuolo INIT_LIST_HEAD(&entry->lru); 1681a65b0e76SDomenico Cerasuolo zswap_lru_add(&entry->pool->list_lru, entry); 1682b5ba474fSNhat Pham atomic_inc(&entry->pool->nr_stored); 1683f999f38bSDomenico Cerasuolo } 16842b281117SSeth Jennings spin_unlock(&tree->lock); 16852b281117SSeth Jennings 16862b281117SSeth Jennings /* update stats */ 16872b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 1688f1c54846SDan Streetman zswap_update_total_size(); 1689f6498b77SJohannes Weiner count_vm_event(ZSWPOUT); 16902b281117SSeth Jennings 169142c06a0eSJohannes Weiner return true; 16922b281117SSeth Jennings 1693f1c54846SDan Streetman put_dstmem: 16948ba2f844SChengming Zhou mutex_unlock(&acomp_ctx->mutex); 1695a65b0e76SDomenico Cerasuolo put_pool: 1696f1c54846SDan Streetman zswap_pool_put(entry->pool); 1697f1c54846SDan Streetman freepage: 16982b281117SSeth Jennings zswap_entry_cache_free(entry); 16992b281117SSeth Jennings reject: 1700f4840ccfSJohannes Weiner if (objcg) 1701f4840ccfSJohannes Weiner obj_cgroup_put(objcg); 170242c06a0eSJohannes Weiner return false; 1703f4840ccfSJohannes Weiner 1704f4840ccfSJohannes Weiner shrink: 1705f4840ccfSJohannes Weiner pool = zswap_pool_last_get(); 1706969d63e1SJohannes Weiner if (pool && !queue_work(shrink_wq, &pool->shrink_work)) 1707969d63e1SJohannes Weiner zswap_pool_put(pool); 1708f4840ccfSJohannes Weiner goto reject; 17092b281117SSeth Jennings } 17102b281117SSeth Jennings 1711ca54f6d8SMatthew Wilcox (Oracle) bool zswap_load(struct folio *folio) 17122b281117SSeth Jennings { 17133d2c9087SDavid Hildenbrand swp_entry_t swp = folio->swap; 171442c06a0eSJohannes Weiner int type = swp_type(swp); 171542c06a0eSJohannes Weiner pgoff_t offset = swp_offset(swp); 1716ca54f6d8SMatthew Wilcox (Oracle) struct page *page = &folio->page; 17172b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 17182b281117SSeth Jennings struct zswap_entry *entry; 171932acba4cSChengming Zhou u8 *dst; 172042c06a0eSJohannes Weiner 1721ca54f6d8SMatthew Wilcox (Oracle) VM_WARN_ON_ONCE(!folio_test_locked(folio)); 17222b281117SSeth Jennings 17232b281117SSeth Jennings /* find */ 17242b281117SSeth Jennings spin_lock(&tree->lock); 17250ab0abcfSWeijie Yang entry = zswap_entry_find_get(&tree->rbroot, offset); 17262b281117SSeth Jennings if (!entry) { 17272b281117SSeth Jennings spin_unlock(&tree->lock); 172842c06a0eSJohannes Weiner return false; 17292b281117SSeth Jennings } 17302b281117SSeth Jennings spin_unlock(&tree->lock); 17312b281117SSeth Jennings 173266447fd0SChengming Zhou if (entry->length) 173366447fd0SChengming Zhou __zswap_load(entry, page); 173466447fd0SChengming Zhou else { 1735003ae2fbSFabio M. De Francesco dst = kmap_local_page(page); 1736a85f878bSSrividya Desireddy zswap_fill_page(dst, entry->value); 1737003ae2fbSFabio M. De Francesco kunmap_local(dst); 1738a85f878bSSrividya Desireddy } 1739a85f878bSSrividya Desireddy 1740f6498b77SJohannes Weiner count_vm_event(ZSWPIN); 1741f4840ccfSJohannes Weiner if (entry->objcg) 1742f4840ccfSJohannes Weiner count_objcg_event(entry->objcg, ZSWPIN); 1743c75f5c1eSChengming Zhou 17442b281117SSeth Jennings spin_lock(&tree->lock); 174566447fd0SChengming Zhou if (zswap_exclusive_loads_enabled) { 1746b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 1747ca54f6d8SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 174835499e2bSDomenico Cerasuolo } else if (entry->length) { 1749a65b0e76SDomenico Cerasuolo zswap_lru_del(&entry->pool->list_lru, entry); 1750a65b0e76SDomenico Cerasuolo zswap_lru_add(&entry->pool->list_lru, entry); 1751b9c91c43SYosry Ahmed } 175218a93707SYosry Ahmed zswap_entry_put(tree, entry); 17532b281117SSeth Jennings spin_unlock(&tree->lock); 17542b281117SSeth Jennings 175566447fd0SChengming Zhou return true; 17562b281117SSeth Jennings } 17572b281117SSeth Jennings 175842c06a0eSJohannes Weiner void zswap_invalidate(int type, pgoff_t offset) 17592b281117SSeth Jennings { 17602b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 17612b281117SSeth Jennings struct zswap_entry *entry; 17622b281117SSeth Jennings 17632b281117SSeth Jennings /* find */ 17642b281117SSeth Jennings spin_lock(&tree->lock); 17652b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 17662b281117SSeth Jennings if (!entry) { 17672b281117SSeth Jennings /* entry was written back */ 17682b281117SSeth Jennings spin_unlock(&tree->lock); 17692b281117SSeth Jennings return; 17702b281117SSeth Jennings } 1771b9c91c43SYosry Ahmed zswap_invalidate_entry(tree, entry); 17722b281117SSeth Jennings spin_unlock(&tree->lock); 17732b281117SSeth Jennings } 17742b281117SSeth Jennings 1775*bb29fd77SChengming Zhou int zswap_swapon(int type) 177642c06a0eSJohannes Weiner { 177742c06a0eSJohannes Weiner struct zswap_tree *tree; 177842c06a0eSJohannes Weiner 177942c06a0eSJohannes Weiner tree = kzalloc(sizeof(*tree), GFP_KERNEL); 178042c06a0eSJohannes Weiner if (!tree) { 178142c06a0eSJohannes Weiner pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1782*bb29fd77SChengming Zhou return -ENOMEM; 178342c06a0eSJohannes Weiner } 178442c06a0eSJohannes Weiner 178542c06a0eSJohannes Weiner tree->rbroot = RB_ROOT; 178642c06a0eSJohannes Weiner spin_lock_init(&tree->lock); 178742c06a0eSJohannes Weiner zswap_trees[type] = tree; 1788*bb29fd77SChengming Zhou return 0; 178942c06a0eSJohannes Weiner } 179042c06a0eSJohannes Weiner 179142c06a0eSJohannes Weiner void zswap_swapoff(int type) 17922b281117SSeth Jennings { 17932b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 17940bd42136SCody P Schafer struct zswap_entry *entry, *n; 17952b281117SSeth Jennings 17962b281117SSeth Jennings if (!tree) 17972b281117SSeth Jennings return; 17982b281117SSeth Jennings 17992b281117SSeth Jennings /* walk the tree and free everything */ 18002b281117SSeth Jennings spin_lock(&tree->lock); 18010ab0abcfSWeijie Yang rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 180260105e12SMinchan Kim zswap_free_entry(entry); 18032b281117SSeth Jennings tree->rbroot = RB_ROOT; 18042b281117SSeth Jennings spin_unlock(&tree->lock); 1805aa9bca05SWeijie Yang kfree(tree); 1806aa9bca05SWeijie Yang zswap_trees[type] = NULL; 18072b281117SSeth Jennings } 18082b281117SSeth Jennings 18092b281117SSeth Jennings /********************************* 18102b281117SSeth Jennings * debugfs functions 18112b281117SSeth Jennings **********************************/ 18122b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 18132b281117SSeth Jennings #include <linux/debugfs.h> 18142b281117SSeth Jennings 18152b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 18162b281117SSeth Jennings 1817141fdeecSLiu Shixin static int zswap_debugfs_init(void) 18182b281117SSeth Jennings { 18192b281117SSeth Jennings if (!debugfs_initialized()) 18202b281117SSeth Jennings return -ENODEV; 18212b281117SSeth Jennings 18222b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 18232b281117SSeth Jennings 18240825a6f9SJoe Perches debugfs_create_u64("pool_limit_hit", 0444, 18252b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 18260825a6f9SJoe Perches debugfs_create_u64("reject_reclaim_fail", 0444, 18272b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 18280825a6f9SJoe Perches debugfs_create_u64("reject_alloc_fail", 0444, 18292b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 18300825a6f9SJoe Perches debugfs_create_u64("reject_kmemcache_fail", 0444, 18312b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1832cb61dad8SNhat Pham debugfs_create_u64("reject_compress_fail", 0444, 1833cb61dad8SNhat Pham zswap_debugfs_root, &zswap_reject_compress_fail); 18340825a6f9SJoe Perches debugfs_create_u64("reject_compress_poor", 0444, 18352b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 18360825a6f9SJoe Perches debugfs_create_u64("written_back_pages", 0444, 18372b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 18380825a6f9SJoe Perches debugfs_create_u64("duplicate_entry", 0444, 18392b281117SSeth Jennings zswap_debugfs_root, &zswap_duplicate_entry); 18400825a6f9SJoe Perches debugfs_create_u64("pool_total_size", 0444, 184112d79d64SDan Streetman zswap_debugfs_root, &zswap_pool_total_size); 18420825a6f9SJoe Perches debugfs_create_atomic_t("stored_pages", 0444, 18432b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 1844a85f878bSSrividya Desireddy debugfs_create_atomic_t("same_filled_pages", 0444, 1845a85f878bSSrividya Desireddy zswap_debugfs_root, &zswap_same_filled_pages); 18462b281117SSeth Jennings 18472b281117SSeth Jennings return 0; 18482b281117SSeth Jennings } 18492b281117SSeth Jennings #else 1850141fdeecSLiu Shixin static int zswap_debugfs_init(void) 18512b281117SSeth Jennings { 18522b281117SSeth Jennings return 0; 18532b281117SSeth Jennings } 18542b281117SSeth Jennings #endif 18552b281117SSeth Jennings 18562b281117SSeth Jennings /********************************* 18572b281117SSeth Jennings * module init and exit 18582b281117SSeth Jennings **********************************/ 1859141fdeecSLiu Shixin static int zswap_setup(void) 18602b281117SSeth Jennings { 1861f1c54846SDan Streetman struct zswap_pool *pool; 1862ad7ed770SSebastian Andrzej Siewior int ret; 186360105e12SMinchan Kim 1864b7919122SLiu Shixin zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1865b7919122SLiu Shixin if (!zswap_entry_cache) { 18662b281117SSeth Jennings pr_err("entry cache creation failed\n"); 1867f1c54846SDan Streetman goto cache_fail; 18682b281117SSeth Jennings } 1869f1c54846SDan Streetman 1870cab7a7e5SSebastian Andrzej Siewior ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1871cab7a7e5SSebastian Andrzej Siewior "mm/zswap_pool:prepare", 1872cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_prepare, 1873cab7a7e5SSebastian Andrzej Siewior zswap_cpu_comp_dead); 1874cab7a7e5SSebastian Andrzej Siewior if (ret) 1875cab7a7e5SSebastian Andrzej Siewior goto hp_fail; 1876cab7a7e5SSebastian Andrzej Siewior 1877f1c54846SDan Streetman pool = __zswap_pool_create_fallback(); 1878ae3d89a7SDan Streetman if (pool) { 1879f1c54846SDan Streetman pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1880b8cf32dcSYosry Ahmed zpool_get_type(pool->zpools[0])); 1881f1c54846SDan Streetman list_add(&pool->list, &zswap_pools); 1882ae3d89a7SDan Streetman zswap_has_pool = true; 1883ae3d89a7SDan Streetman } else { 1884ae3d89a7SDan Streetman pr_err("pool creation failed\n"); 1885ae3d89a7SDan Streetman zswap_enabled = false; 1886ae3d89a7SDan Streetman } 188760105e12SMinchan Kim 18888409a385SRonald Monthero shrink_wq = alloc_workqueue("zswap-shrink", 18898409a385SRonald Monthero WQ_UNBOUND|WQ_MEM_RECLAIM, 1); 189045190f01SVitaly Wool if (!shrink_wq) 189145190f01SVitaly Wool goto fallback_fail; 189245190f01SVitaly Wool 18932b281117SSeth Jennings if (zswap_debugfs_init()) 18942b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 18959021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_SUCCEED; 18962b281117SSeth Jennings return 0; 1897f1c54846SDan Streetman 189845190f01SVitaly Wool fallback_fail: 189938aeb071SDan Carpenter if (pool) 190045190f01SVitaly Wool zswap_pool_destroy(pool); 1901cab7a7e5SSebastian Andrzej Siewior hp_fail: 1902b7919122SLiu Shixin kmem_cache_destroy(zswap_entry_cache); 1903f1c54846SDan Streetman cache_fail: 1904d7b028f5SDan Streetman /* if built-in, we aren't unloaded on failure; don't allow use */ 19059021ccecSLiu Shixin zswap_init_state = ZSWAP_INIT_FAILED; 1906d7b028f5SDan Streetman zswap_enabled = false; 19072b281117SSeth Jennings return -ENOMEM; 19082b281117SSeth Jennings } 1909141fdeecSLiu Shixin 1910141fdeecSLiu Shixin static int __init zswap_init(void) 1911141fdeecSLiu Shixin { 1912141fdeecSLiu Shixin if (!zswap_enabled) 1913141fdeecSLiu Shixin return 0; 1914141fdeecSLiu Shixin return zswap_setup(); 1915141fdeecSLiu Shixin } 19162b281117SSeth Jennings /* must be late so crypto has time to come up */ 1917141fdeecSLiu Shixin late_initcall(zswap_init); 19182b281117SSeth Jennings 191968386da8SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 19202b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 1921