1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a cache that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/cpu.h> 18 #include <linux/highmem.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/types.h> 22 #include <linux/atomic.h> 23 #include <linux/rbtree.h> 24 #include <linux/swap.h> 25 #include <linux/crypto.h> 26 #include <linux/scatterlist.h> 27 #include <linux/mempolicy.h> 28 #include <linux/mempool.h> 29 #include <linux/zpool.h> 30 #include <crypto/acompress.h> 31 #include <linux/zswap.h> 32 #include <linux/mm_types.h> 33 #include <linux/page-flags.h> 34 #include <linux/swapops.h> 35 #include <linux/writeback.h> 36 #include <linux/pagemap.h> 37 #include <linux/workqueue.h> 38 39 #include "swap.h" 40 #include "internal.h" 41 42 /********************************* 43 * statistics 44 **********************************/ 45 /* Total bytes used by the compressed storage */ 46 u64 zswap_pool_total_size; 47 /* The number of compressed pages currently stored in zswap */ 48 atomic_t zswap_stored_pages = ATOMIC_INIT(0); 49 /* The number of same-value filled pages currently stored in zswap */ 50 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 51 52 /* 53 * The statistics below are not protected from concurrent access for 54 * performance reasons so they may not be a 100% accurate. However, 55 * they do provide useful information on roughly how many times a 56 * certain event is occurring. 57 */ 58 59 /* Pool limit was hit (see zswap_max_pool_percent) */ 60 static u64 zswap_pool_limit_hit; 61 /* Pages written back when pool limit was reached */ 62 static u64 zswap_written_back_pages; 63 /* Store failed due to a reclaim failure after pool limit was reached */ 64 static u64 zswap_reject_reclaim_fail; 65 /* Store failed due to compression algorithm failure */ 66 static u64 zswap_reject_compress_fail; 67 /* Compressed page was too big for the allocator to (optimally) store */ 68 static u64 zswap_reject_compress_poor; 69 /* Store failed because underlying allocator could not get memory */ 70 static u64 zswap_reject_alloc_fail; 71 /* Store failed because the entry metadata could not be allocated (rare) */ 72 static u64 zswap_reject_kmemcache_fail; 73 /* Duplicate store was encountered (rare) */ 74 static u64 zswap_duplicate_entry; 75 76 /* Shrinker work queue */ 77 static struct workqueue_struct *shrink_wq; 78 /* Pool limit was hit, we need to calm down */ 79 static bool zswap_pool_reached_full; 80 81 /********************************* 82 * tunables 83 **********************************/ 84 85 #define ZSWAP_PARAM_UNSET "" 86 87 static int zswap_setup(void); 88 89 /* Enable/disable zswap */ 90 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 91 static int zswap_enabled_param_set(const char *, 92 const struct kernel_param *); 93 static const struct kernel_param_ops zswap_enabled_param_ops = { 94 .set = zswap_enabled_param_set, 95 .get = param_get_bool, 96 }; 97 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 98 99 /* Crypto compressor to use */ 100 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 101 static int zswap_compressor_param_set(const char *, 102 const struct kernel_param *); 103 static const struct kernel_param_ops zswap_compressor_param_ops = { 104 .set = zswap_compressor_param_set, 105 .get = param_get_charp, 106 .free = param_free_charp, 107 }; 108 module_param_cb(compressor, &zswap_compressor_param_ops, 109 &zswap_compressor, 0644); 110 111 /* Compressed storage zpool to use */ 112 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 113 static int zswap_zpool_param_set(const char *, const struct kernel_param *); 114 static const struct kernel_param_ops zswap_zpool_param_ops = { 115 .set = zswap_zpool_param_set, 116 .get = param_get_charp, 117 .free = param_free_charp, 118 }; 119 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 120 121 /* The maximum percentage of memory that the compressed pool can occupy */ 122 static unsigned int zswap_max_pool_percent = 20; 123 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 124 125 /* The threshold for accepting new pages after the max_pool_percent was hit */ 126 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 127 module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 128 uint, 0644); 129 130 /* 131 * Enable/disable handling same-value filled pages (enabled by default). 132 * If disabled every page is considered non-same-value filled. 133 */ 134 static bool zswap_same_filled_pages_enabled = true; 135 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 136 bool, 0644); 137 138 /* Enable/disable handling non-same-value filled pages (enabled by default) */ 139 static bool zswap_non_same_filled_pages_enabled = true; 140 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 141 bool, 0644); 142 143 static bool zswap_exclusive_loads_enabled = IS_ENABLED( 144 CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); 145 module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); 146 147 /* Number of zpools in zswap_pool (empirically determined for scalability) */ 148 #define ZSWAP_NR_ZPOOLS 32 149 150 /********************************* 151 * data structures 152 **********************************/ 153 154 struct crypto_acomp_ctx { 155 struct crypto_acomp *acomp; 156 struct acomp_req *req; 157 struct crypto_wait wait; 158 u8 *dstmem; 159 struct mutex *mutex; 160 }; 161 162 /* 163 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 164 * The only case where lru_lock is not acquired while holding tree.lock is 165 * when a zswap_entry is taken off the lru for writeback, in that case it 166 * needs to be verified that it's still valid in the tree. 167 */ 168 struct zswap_pool { 169 struct zpool *zpools[ZSWAP_NR_ZPOOLS]; 170 struct crypto_acomp_ctx __percpu *acomp_ctx; 171 struct kref kref; 172 struct list_head list; 173 struct work_struct release_work; 174 struct work_struct shrink_work; 175 struct hlist_node node; 176 char tfm_name[CRYPTO_MAX_ALG_NAME]; 177 struct list_head lru; 178 spinlock_t lru_lock; 179 }; 180 181 /* 182 * struct zswap_entry 183 * 184 * This structure contains the metadata for tracking a single compressed 185 * page within zswap. 186 * 187 * rbnode - links the entry into red-black tree for the appropriate swap type 188 * swpentry - associated swap entry, the offset indexes into the red-black tree 189 * refcount - the number of outstanding reference to the entry. This is needed 190 * to protect against premature freeing of the entry by code 191 * concurrent calls to load, invalidate, and writeback. The lock 192 * for the zswap_tree structure that contains the entry must 193 * be held while changing the refcount. Since the lock must 194 * be held, there is no reason to also make refcount atomic. 195 * length - the length in bytes of the compressed page data. Needed during 196 * decompression. For a same value filled page length is 0, and both 197 * pool and lru are invalid and must be ignored. 198 * pool - the zswap_pool the entry's data is in 199 * handle - zpool allocation handle that stores the compressed page data 200 * value - value of the same-value filled pages which have same content 201 * objcg - the obj_cgroup that the compressed memory is charged to 202 * lru - handle to the pool's lru used to evict pages. 203 */ 204 struct zswap_entry { 205 struct rb_node rbnode; 206 swp_entry_t swpentry; 207 int refcount; 208 unsigned int length; 209 struct zswap_pool *pool; 210 union { 211 unsigned long handle; 212 unsigned long value; 213 }; 214 struct obj_cgroup *objcg; 215 struct list_head lru; 216 }; 217 218 /* 219 * The tree lock in the zswap_tree struct protects a few things: 220 * - the rbtree 221 * - the refcount field of each entry in the tree 222 */ 223 struct zswap_tree { 224 struct rb_root rbroot; 225 spinlock_t lock; 226 }; 227 228 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 229 230 /* RCU-protected iteration */ 231 static LIST_HEAD(zswap_pools); 232 /* protects zswap_pools list modification */ 233 static DEFINE_SPINLOCK(zswap_pools_lock); 234 /* pool counter to provide unique names to zpool */ 235 static atomic_t zswap_pools_count = ATOMIC_INIT(0); 236 237 enum zswap_init_type { 238 ZSWAP_UNINIT, 239 ZSWAP_INIT_SUCCEED, 240 ZSWAP_INIT_FAILED 241 }; 242 243 static enum zswap_init_type zswap_init_state; 244 245 /* used to ensure the integrity of initialization */ 246 static DEFINE_MUTEX(zswap_init_lock); 247 248 /* init completed, but couldn't create the initial pool */ 249 static bool zswap_has_pool; 250 251 /********************************* 252 * helpers and fwd declarations 253 **********************************/ 254 255 #define zswap_pool_debug(msg, p) \ 256 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 257 zpool_get_type((p)->zpools[0])) 258 259 static int zswap_writeback_entry(struct zswap_entry *entry, 260 struct zswap_tree *tree); 261 static int zswap_pool_get(struct zswap_pool *pool); 262 static void zswap_pool_put(struct zswap_pool *pool); 263 264 static bool zswap_is_full(void) 265 { 266 return totalram_pages() * zswap_max_pool_percent / 100 < 267 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 268 } 269 270 static bool zswap_can_accept(void) 271 { 272 return totalram_pages() * zswap_accept_thr_percent / 100 * 273 zswap_max_pool_percent / 100 > 274 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 275 } 276 277 static void zswap_update_total_size(void) 278 { 279 struct zswap_pool *pool; 280 u64 total = 0; 281 int i; 282 283 rcu_read_lock(); 284 285 list_for_each_entry_rcu(pool, &zswap_pools, list) 286 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 287 total += zpool_get_total_size(pool->zpools[i]); 288 289 rcu_read_unlock(); 290 291 zswap_pool_total_size = total; 292 } 293 294 /********************************* 295 * zswap entry functions 296 **********************************/ 297 static struct kmem_cache *zswap_entry_cache; 298 299 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 300 { 301 struct zswap_entry *entry; 302 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 303 if (!entry) 304 return NULL; 305 entry->refcount = 1; 306 RB_CLEAR_NODE(&entry->rbnode); 307 return entry; 308 } 309 310 static void zswap_entry_cache_free(struct zswap_entry *entry) 311 { 312 kmem_cache_free(zswap_entry_cache, entry); 313 } 314 315 /********************************* 316 * rbtree functions 317 **********************************/ 318 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 319 { 320 struct rb_node *node = root->rb_node; 321 struct zswap_entry *entry; 322 pgoff_t entry_offset; 323 324 while (node) { 325 entry = rb_entry(node, struct zswap_entry, rbnode); 326 entry_offset = swp_offset(entry->swpentry); 327 if (entry_offset > offset) 328 node = node->rb_left; 329 else if (entry_offset < offset) 330 node = node->rb_right; 331 else 332 return entry; 333 } 334 return NULL; 335 } 336 337 /* 338 * In the case that a entry with the same offset is found, a pointer to 339 * the existing entry is stored in dupentry and the function returns -EEXIST 340 */ 341 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 342 struct zswap_entry **dupentry) 343 { 344 struct rb_node **link = &root->rb_node, *parent = NULL; 345 struct zswap_entry *myentry; 346 pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 347 348 while (*link) { 349 parent = *link; 350 myentry = rb_entry(parent, struct zswap_entry, rbnode); 351 myentry_offset = swp_offset(myentry->swpentry); 352 if (myentry_offset > entry_offset) 353 link = &(*link)->rb_left; 354 else if (myentry_offset < entry_offset) 355 link = &(*link)->rb_right; 356 else { 357 *dupentry = myentry; 358 return -EEXIST; 359 } 360 } 361 rb_link_node(&entry->rbnode, parent, link); 362 rb_insert_color(&entry->rbnode, root); 363 return 0; 364 } 365 366 static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 367 { 368 if (!RB_EMPTY_NODE(&entry->rbnode)) { 369 rb_erase(&entry->rbnode, root); 370 RB_CLEAR_NODE(&entry->rbnode); 371 return true; 372 } 373 return false; 374 } 375 376 static struct zpool *zswap_find_zpool(struct zswap_entry *entry) 377 { 378 int i = 0; 379 380 if (ZSWAP_NR_ZPOOLS > 1) 381 i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); 382 383 return entry->pool->zpools[i]; 384 } 385 386 /* 387 * Carries out the common pattern of freeing and entry's zpool allocation, 388 * freeing the entry itself, and decrementing the number of stored pages. 389 */ 390 static void zswap_free_entry(struct zswap_entry *entry) 391 { 392 if (entry->objcg) { 393 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 394 obj_cgroup_put(entry->objcg); 395 } 396 if (!entry->length) 397 atomic_dec(&zswap_same_filled_pages); 398 else { 399 spin_lock(&entry->pool->lru_lock); 400 list_del(&entry->lru); 401 spin_unlock(&entry->pool->lru_lock); 402 zpool_free(zswap_find_zpool(entry), entry->handle); 403 zswap_pool_put(entry->pool); 404 } 405 zswap_entry_cache_free(entry); 406 atomic_dec(&zswap_stored_pages); 407 zswap_update_total_size(); 408 } 409 410 /* caller must hold the tree lock */ 411 static void zswap_entry_get(struct zswap_entry *entry) 412 { 413 entry->refcount++; 414 } 415 416 /* caller must hold the tree lock 417 * remove from the tree and free it, if nobody reference the entry 418 */ 419 static void zswap_entry_put(struct zswap_tree *tree, 420 struct zswap_entry *entry) 421 { 422 int refcount = --entry->refcount; 423 424 WARN_ON_ONCE(refcount < 0); 425 if (refcount == 0) { 426 WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode)); 427 zswap_free_entry(entry); 428 } 429 } 430 431 /* caller must hold the tree lock */ 432 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 433 pgoff_t offset) 434 { 435 struct zswap_entry *entry; 436 437 entry = zswap_rb_search(root, offset); 438 if (entry) 439 zswap_entry_get(entry); 440 441 return entry; 442 } 443 444 /********************************* 445 * per-cpu code 446 **********************************/ 447 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 448 /* 449 * If users dynamically change the zpool type and compressor at runtime, i.e. 450 * zswap is running, zswap can have more than one zpool on one cpu, but they 451 * are sharing dtsmem. So we need this mutex to be per-cpu. 452 */ 453 static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 454 455 static int zswap_dstmem_prepare(unsigned int cpu) 456 { 457 struct mutex *mutex; 458 u8 *dst; 459 460 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 461 if (!dst) 462 return -ENOMEM; 463 464 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 465 if (!mutex) { 466 kfree(dst); 467 return -ENOMEM; 468 } 469 470 mutex_init(mutex); 471 per_cpu(zswap_dstmem, cpu) = dst; 472 per_cpu(zswap_mutex, cpu) = mutex; 473 return 0; 474 } 475 476 static int zswap_dstmem_dead(unsigned int cpu) 477 { 478 struct mutex *mutex; 479 u8 *dst; 480 481 mutex = per_cpu(zswap_mutex, cpu); 482 kfree(mutex); 483 per_cpu(zswap_mutex, cpu) = NULL; 484 485 dst = per_cpu(zswap_dstmem, cpu); 486 kfree(dst); 487 per_cpu(zswap_dstmem, cpu) = NULL; 488 489 return 0; 490 } 491 492 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 493 { 494 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 495 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 496 struct crypto_acomp *acomp; 497 struct acomp_req *req; 498 499 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 500 if (IS_ERR(acomp)) { 501 pr_err("could not alloc crypto acomp %s : %ld\n", 502 pool->tfm_name, PTR_ERR(acomp)); 503 return PTR_ERR(acomp); 504 } 505 acomp_ctx->acomp = acomp; 506 507 req = acomp_request_alloc(acomp_ctx->acomp); 508 if (!req) { 509 pr_err("could not alloc crypto acomp_request %s\n", 510 pool->tfm_name); 511 crypto_free_acomp(acomp_ctx->acomp); 512 return -ENOMEM; 513 } 514 acomp_ctx->req = req; 515 516 crypto_init_wait(&acomp_ctx->wait); 517 /* 518 * if the backend of acomp is async zip, crypto_req_done() will wakeup 519 * crypto_wait_req(); if the backend of acomp is scomp, the callback 520 * won't be called, crypto_wait_req() will return without blocking. 521 */ 522 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 523 crypto_req_done, &acomp_ctx->wait); 524 525 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 526 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 527 528 return 0; 529 } 530 531 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 532 { 533 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 534 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 535 536 if (!IS_ERR_OR_NULL(acomp_ctx)) { 537 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 538 acomp_request_free(acomp_ctx->req); 539 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 540 crypto_free_acomp(acomp_ctx->acomp); 541 } 542 543 return 0; 544 } 545 546 /********************************* 547 * pool functions 548 **********************************/ 549 550 static struct zswap_pool *__zswap_pool_current(void) 551 { 552 struct zswap_pool *pool; 553 554 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 555 WARN_ONCE(!pool && zswap_has_pool, 556 "%s: no page storage pool!\n", __func__); 557 558 return pool; 559 } 560 561 static struct zswap_pool *zswap_pool_current(void) 562 { 563 assert_spin_locked(&zswap_pools_lock); 564 565 return __zswap_pool_current(); 566 } 567 568 static struct zswap_pool *zswap_pool_current_get(void) 569 { 570 struct zswap_pool *pool; 571 572 rcu_read_lock(); 573 574 pool = __zswap_pool_current(); 575 if (!zswap_pool_get(pool)) 576 pool = NULL; 577 578 rcu_read_unlock(); 579 580 return pool; 581 } 582 583 static struct zswap_pool *zswap_pool_last_get(void) 584 { 585 struct zswap_pool *pool, *last = NULL; 586 587 rcu_read_lock(); 588 589 list_for_each_entry_rcu(pool, &zswap_pools, list) 590 last = pool; 591 WARN_ONCE(!last && zswap_has_pool, 592 "%s: no page storage pool!\n", __func__); 593 if (!zswap_pool_get(last)) 594 last = NULL; 595 596 rcu_read_unlock(); 597 598 return last; 599 } 600 601 /* type and compressor must be null-terminated */ 602 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 603 { 604 struct zswap_pool *pool; 605 606 assert_spin_locked(&zswap_pools_lock); 607 608 list_for_each_entry_rcu(pool, &zswap_pools, list) { 609 if (strcmp(pool->tfm_name, compressor)) 610 continue; 611 /* all zpools share the same type */ 612 if (strcmp(zpool_get_type(pool->zpools[0]), type)) 613 continue; 614 /* if we can't get it, it's about to be destroyed */ 615 if (!zswap_pool_get(pool)) 616 continue; 617 return pool; 618 } 619 620 return NULL; 621 } 622 623 /* 624 * If the entry is still valid in the tree, drop the initial ref and remove it 625 * from the tree. This function must be called with an additional ref held, 626 * otherwise it may race with another invalidation freeing the entry. 627 */ 628 static void zswap_invalidate_entry(struct zswap_tree *tree, 629 struct zswap_entry *entry) 630 { 631 if (zswap_rb_erase(&tree->rbroot, entry)) 632 zswap_entry_put(tree, entry); 633 } 634 635 static int zswap_reclaim_entry(struct zswap_pool *pool) 636 { 637 struct zswap_entry *entry; 638 struct zswap_tree *tree; 639 pgoff_t swpoffset; 640 int ret; 641 642 /* Get an entry off the LRU */ 643 spin_lock(&pool->lru_lock); 644 if (list_empty(&pool->lru)) { 645 spin_unlock(&pool->lru_lock); 646 return -EINVAL; 647 } 648 entry = list_last_entry(&pool->lru, struct zswap_entry, lru); 649 list_del_init(&entry->lru); 650 /* 651 * Once the lru lock is dropped, the entry might get freed. The 652 * swpoffset is copied to the stack, and entry isn't deref'd again 653 * until the entry is verified to still be alive in the tree. 654 */ 655 swpoffset = swp_offset(entry->swpentry); 656 tree = zswap_trees[swp_type(entry->swpentry)]; 657 spin_unlock(&pool->lru_lock); 658 659 /* Check for invalidate() race */ 660 spin_lock(&tree->lock); 661 if (entry != zswap_rb_search(&tree->rbroot, swpoffset)) { 662 ret = -EAGAIN; 663 goto unlock; 664 } 665 /* Hold a reference to prevent a free during writeback */ 666 zswap_entry_get(entry); 667 spin_unlock(&tree->lock); 668 669 ret = zswap_writeback_entry(entry, tree); 670 671 spin_lock(&tree->lock); 672 if (ret) { 673 /* Writeback failed, put entry back on LRU */ 674 spin_lock(&pool->lru_lock); 675 list_move(&entry->lru, &pool->lru); 676 spin_unlock(&pool->lru_lock); 677 goto put_unlock; 678 } 679 680 /* 681 * Writeback started successfully, the page now belongs to the 682 * swapcache. Drop the entry from zswap - unless invalidate already 683 * took it out while we had the tree->lock released for IO. 684 */ 685 zswap_invalidate_entry(tree, entry); 686 687 put_unlock: 688 /* Drop local reference */ 689 zswap_entry_put(tree, entry); 690 unlock: 691 spin_unlock(&tree->lock); 692 return ret ? -EAGAIN : 0; 693 } 694 695 static void shrink_worker(struct work_struct *w) 696 { 697 struct zswap_pool *pool = container_of(w, typeof(*pool), 698 shrink_work); 699 int ret, failures = 0; 700 701 do { 702 ret = zswap_reclaim_entry(pool); 703 if (ret) { 704 zswap_reject_reclaim_fail++; 705 if (ret != -EAGAIN) 706 break; 707 if (++failures == MAX_RECLAIM_RETRIES) 708 break; 709 } 710 cond_resched(); 711 } while (!zswap_can_accept()); 712 zswap_pool_put(pool); 713 } 714 715 static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 716 { 717 int i; 718 struct zswap_pool *pool; 719 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 720 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 721 int ret; 722 723 if (!zswap_has_pool) { 724 /* if either are unset, pool initialization failed, and we 725 * need both params to be set correctly before trying to 726 * create a pool. 727 */ 728 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 729 return NULL; 730 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 731 return NULL; 732 } 733 734 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 735 if (!pool) 736 return NULL; 737 738 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { 739 /* unique name for each pool specifically required by zsmalloc */ 740 snprintf(name, 38, "zswap%x", 741 atomic_inc_return(&zswap_pools_count)); 742 743 pool->zpools[i] = zpool_create_pool(type, name, gfp); 744 if (!pool->zpools[i]) { 745 pr_err("%s zpool not available\n", type); 746 goto error; 747 } 748 } 749 pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); 750 751 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 752 753 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 754 if (!pool->acomp_ctx) { 755 pr_err("percpu alloc failed\n"); 756 goto error; 757 } 758 759 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 760 &pool->node); 761 if (ret) 762 goto error; 763 pr_debug("using %s compressor\n", pool->tfm_name); 764 765 /* being the current pool takes 1 ref; this func expects the 766 * caller to always add the new pool as the current pool 767 */ 768 kref_init(&pool->kref); 769 INIT_LIST_HEAD(&pool->list); 770 INIT_LIST_HEAD(&pool->lru); 771 spin_lock_init(&pool->lru_lock); 772 INIT_WORK(&pool->shrink_work, shrink_worker); 773 774 zswap_pool_debug("created", pool); 775 776 return pool; 777 778 error: 779 if (pool->acomp_ctx) 780 free_percpu(pool->acomp_ctx); 781 while (i--) 782 zpool_destroy_pool(pool->zpools[i]); 783 kfree(pool); 784 return NULL; 785 } 786 787 static struct zswap_pool *__zswap_pool_create_fallback(void) 788 { 789 bool has_comp, has_zpool; 790 791 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 792 if (!has_comp && strcmp(zswap_compressor, 793 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 794 pr_err("compressor %s not available, using default %s\n", 795 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 796 param_free_charp(&zswap_compressor); 797 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 798 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 799 } 800 if (!has_comp) { 801 pr_err("default compressor %s not available\n", 802 zswap_compressor); 803 param_free_charp(&zswap_compressor); 804 zswap_compressor = ZSWAP_PARAM_UNSET; 805 } 806 807 has_zpool = zpool_has_pool(zswap_zpool_type); 808 if (!has_zpool && strcmp(zswap_zpool_type, 809 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 810 pr_err("zpool %s not available, using default %s\n", 811 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 812 param_free_charp(&zswap_zpool_type); 813 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 814 has_zpool = zpool_has_pool(zswap_zpool_type); 815 } 816 if (!has_zpool) { 817 pr_err("default zpool %s not available\n", 818 zswap_zpool_type); 819 param_free_charp(&zswap_zpool_type); 820 zswap_zpool_type = ZSWAP_PARAM_UNSET; 821 } 822 823 if (!has_comp || !has_zpool) 824 return NULL; 825 826 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 827 } 828 829 static void zswap_pool_destroy(struct zswap_pool *pool) 830 { 831 int i; 832 833 zswap_pool_debug("destroying", pool); 834 835 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 836 free_percpu(pool->acomp_ctx); 837 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) 838 zpool_destroy_pool(pool->zpools[i]); 839 kfree(pool); 840 } 841 842 static int __must_check zswap_pool_get(struct zswap_pool *pool) 843 { 844 if (!pool) 845 return 0; 846 847 return kref_get_unless_zero(&pool->kref); 848 } 849 850 static void __zswap_pool_release(struct work_struct *work) 851 { 852 struct zswap_pool *pool = container_of(work, typeof(*pool), 853 release_work); 854 855 synchronize_rcu(); 856 857 /* nobody should have been able to get a kref... */ 858 WARN_ON(kref_get_unless_zero(&pool->kref)); 859 860 /* pool is now off zswap_pools list and has no references. */ 861 zswap_pool_destroy(pool); 862 } 863 864 static void __zswap_pool_empty(struct kref *kref) 865 { 866 struct zswap_pool *pool; 867 868 pool = container_of(kref, typeof(*pool), kref); 869 870 spin_lock(&zswap_pools_lock); 871 872 WARN_ON(pool == zswap_pool_current()); 873 874 list_del_rcu(&pool->list); 875 876 INIT_WORK(&pool->release_work, __zswap_pool_release); 877 schedule_work(&pool->release_work); 878 879 spin_unlock(&zswap_pools_lock); 880 } 881 882 static void zswap_pool_put(struct zswap_pool *pool) 883 { 884 kref_put(&pool->kref, __zswap_pool_empty); 885 } 886 887 /********************************* 888 * param callbacks 889 **********************************/ 890 891 static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 892 { 893 /* no change required */ 894 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 895 return false; 896 return true; 897 } 898 899 /* val must be a null-terminated string */ 900 static int __zswap_param_set(const char *val, const struct kernel_param *kp, 901 char *type, char *compressor) 902 { 903 struct zswap_pool *pool, *put_pool = NULL; 904 char *s = strstrip((char *)val); 905 int ret = 0; 906 bool new_pool = false; 907 908 mutex_lock(&zswap_init_lock); 909 switch (zswap_init_state) { 910 case ZSWAP_UNINIT: 911 /* if this is load-time (pre-init) param setting, 912 * don't create a pool; that's done during init. 913 */ 914 ret = param_set_charp(s, kp); 915 break; 916 case ZSWAP_INIT_SUCCEED: 917 new_pool = zswap_pool_changed(s, kp); 918 break; 919 case ZSWAP_INIT_FAILED: 920 pr_err("can't set param, initialization failed\n"); 921 ret = -ENODEV; 922 } 923 mutex_unlock(&zswap_init_lock); 924 925 /* no need to create a new pool, return directly */ 926 if (!new_pool) 927 return ret; 928 929 if (!type) { 930 if (!zpool_has_pool(s)) { 931 pr_err("zpool %s not available\n", s); 932 return -ENOENT; 933 } 934 type = s; 935 } else if (!compressor) { 936 if (!crypto_has_acomp(s, 0, 0)) { 937 pr_err("compressor %s not available\n", s); 938 return -ENOENT; 939 } 940 compressor = s; 941 } else { 942 WARN_ON(1); 943 return -EINVAL; 944 } 945 946 spin_lock(&zswap_pools_lock); 947 948 pool = zswap_pool_find_get(type, compressor); 949 if (pool) { 950 zswap_pool_debug("using existing", pool); 951 WARN_ON(pool == zswap_pool_current()); 952 list_del_rcu(&pool->list); 953 } 954 955 spin_unlock(&zswap_pools_lock); 956 957 if (!pool) 958 pool = zswap_pool_create(type, compressor); 959 960 if (pool) 961 ret = param_set_charp(s, kp); 962 else 963 ret = -EINVAL; 964 965 spin_lock(&zswap_pools_lock); 966 967 if (!ret) { 968 put_pool = zswap_pool_current(); 969 list_add_rcu(&pool->list, &zswap_pools); 970 zswap_has_pool = true; 971 } else if (pool) { 972 /* add the possibly pre-existing pool to the end of the pools 973 * list; if it's new (and empty) then it'll be removed and 974 * destroyed by the put after we drop the lock 975 */ 976 list_add_tail_rcu(&pool->list, &zswap_pools); 977 put_pool = pool; 978 } 979 980 spin_unlock(&zswap_pools_lock); 981 982 if (!zswap_has_pool && !pool) { 983 /* if initial pool creation failed, and this pool creation also 984 * failed, maybe both compressor and zpool params were bad. 985 * Allow changing this param, so pool creation will succeed 986 * when the other param is changed. We already verified this 987 * param is ok in the zpool_has_pool() or crypto_has_acomp() 988 * checks above. 989 */ 990 ret = param_set_charp(s, kp); 991 } 992 993 /* drop the ref from either the old current pool, 994 * or the new pool we failed to add 995 */ 996 if (put_pool) 997 zswap_pool_put(put_pool); 998 999 return ret; 1000 } 1001 1002 static int zswap_compressor_param_set(const char *val, 1003 const struct kernel_param *kp) 1004 { 1005 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 1006 } 1007 1008 static int zswap_zpool_param_set(const char *val, 1009 const struct kernel_param *kp) 1010 { 1011 return __zswap_param_set(val, kp, NULL, zswap_compressor); 1012 } 1013 1014 static int zswap_enabled_param_set(const char *val, 1015 const struct kernel_param *kp) 1016 { 1017 int ret = -ENODEV; 1018 1019 /* if this is load-time (pre-init) param setting, only set param. */ 1020 if (system_state != SYSTEM_RUNNING) 1021 return param_set_bool(val, kp); 1022 1023 mutex_lock(&zswap_init_lock); 1024 switch (zswap_init_state) { 1025 case ZSWAP_UNINIT: 1026 if (zswap_setup()) 1027 break; 1028 fallthrough; 1029 case ZSWAP_INIT_SUCCEED: 1030 if (!zswap_has_pool) 1031 pr_err("can't enable, no pool configured\n"); 1032 else 1033 ret = param_set_bool(val, kp); 1034 break; 1035 case ZSWAP_INIT_FAILED: 1036 pr_err("can't enable, initialization failed\n"); 1037 } 1038 mutex_unlock(&zswap_init_lock); 1039 1040 return ret; 1041 } 1042 1043 /********************************* 1044 * writeback code 1045 **********************************/ 1046 /* 1047 * Attempts to free an entry by adding a page to the swap cache, 1048 * decompressing the entry data into the page, and issuing a 1049 * bio write to write the page back to the swap device. 1050 * 1051 * This can be thought of as a "resumed writeback" of the page 1052 * to the swap device. We are basically resuming the same swap 1053 * writeback path that was intercepted with the zswap_store() 1054 * in the first place. After the page has been decompressed into 1055 * the swap cache, the compressed version stored by zswap can be 1056 * freed. 1057 */ 1058 static int zswap_writeback_entry(struct zswap_entry *entry, 1059 struct zswap_tree *tree) 1060 { 1061 swp_entry_t swpentry = entry->swpentry; 1062 struct page *page; 1063 struct mempolicy *mpol; 1064 struct scatterlist input, output; 1065 struct crypto_acomp_ctx *acomp_ctx; 1066 struct zpool *pool = zswap_find_zpool(entry); 1067 bool page_was_allocated; 1068 u8 *src, *tmp = NULL; 1069 unsigned int dlen; 1070 int ret; 1071 struct writeback_control wbc = { 1072 .sync_mode = WB_SYNC_NONE, 1073 }; 1074 1075 if (!zpool_can_sleep_mapped(pool)) { 1076 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); 1077 if (!tmp) 1078 return -ENOMEM; 1079 } 1080 1081 /* try to allocate swap cache page */ 1082 mpol = get_task_policy(current); 1083 page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, 1084 NO_INTERLEAVE_INDEX, &page_was_allocated); 1085 if (!page) { 1086 ret = -ENOMEM; 1087 goto fail; 1088 } 1089 1090 /* Found an existing page, we raced with load/swapin */ 1091 if (!page_was_allocated) { 1092 put_page(page); 1093 ret = -EEXIST; 1094 goto fail; 1095 } 1096 1097 /* 1098 * Page is locked, and the swapcache is now secured against 1099 * concurrent swapping to and from the slot. Verify that the 1100 * swap entry hasn't been invalidated and recycled behind our 1101 * backs (our zswap_entry reference doesn't prevent that), to 1102 * avoid overwriting a new swap page with old compressed data. 1103 */ 1104 spin_lock(&tree->lock); 1105 if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { 1106 spin_unlock(&tree->lock); 1107 delete_from_swap_cache(page_folio(page)); 1108 ret = -ENOMEM; 1109 goto fail; 1110 } 1111 spin_unlock(&tree->lock); 1112 1113 /* decompress */ 1114 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1115 dlen = PAGE_SIZE; 1116 1117 src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO); 1118 if (!zpool_can_sleep_mapped(pool)) { 1119 memcpy(tmp, src, entry->length); 1120 src = tmp; 1121 zpool_unmap_handle(pool, entry->handle); 1122 } 1123 1124 mutex_lock(acomp_ctx->mutex); 1125 sg_init_one(&input, src, entry->length); 1126 sg_init_table(&output, 1); 1127 sg_set_page(&output, page, PAGE_SIZE, 0); 1128 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1129 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1130 dlen = acomp_ctx->req->dlen; 1131 mutex_unlock(acomp_ctx->mutex); 1132 1133 if (!zpool_can_sleep_mapped(pool)) 1134 kfree(tmp); 1135 else 1136 zpool_unmap_handle(pool, entry->handle); 1137 1138 BUG_ON(ret); 1139 BUG_ON(dlen != PAGE_SIZE); 1140 1141 /* page is up to date */ 1142 SetPageUptodate(page); 1143 1144 /* move it to the tail of the inactive list after end_writeback */ 1145 SetPageReclaim(page); 1146 1147 /* start writeback */ 1148 __swap_writepage(page, &wbc); 1149 put_page(page); 1150 zswap_written_back_pages++; 1151 1152 return ret; 1153 1154 fail: 1155 if (!zpool_can_sleep_mapped(pool)) 1156 kfree(tmp); 1157 1158 /* 1159 * If we get here because the page is already in swapcache, a 1160 * load may be happening concurrently. It is safe and okay to 1161 * not free the entry. It is also okay to return !0. 1162 */ 1163 return ret; 1164 } 1165 1166 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1167 { 1168 unsigned long *page; 1169 unsigned long val; 1170 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1171 1172 page = (unsigned long *)ptr; 1173 val = page[0]; 1174 1175 if (val != page[last_pos]) 1176 return 0; 1177 1178 for (pos = 1; pos < last_pos; pos++) { 1179 if (val != page[pos]) 1180 return 0; 1181 } 1182 1183 *value = val; 1184 1185 return 1; 1186 } 1187 1188 static void zswap_fill_page(void *ptr, unsigned long value) 1189 { 1190 unsigned long *page; 1191 1192 page = (unsigned long *)ptr; 1193 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1194 } 1195 1196 bool zswap_store(struct folio *folio) 1197 { 1198 swp_entry_t swp = folio->swap; 1199 int type = swp_type(swp); 1200 pgoff_t offset = swp_offset(swp); 1201 struct page *page = &folio->page; 1202 struct zswap_tree *tree = zswap_trees[type]; 1203 struct zswap_entry *entry, *dupentry; 1204 struct scatterlist input, output; 1205 struct crypto_acomp_ctx *acomp_ctx; 1206 struct obj_cgroup *objcg = NULL; 1207 struct zswap_pool *pool; 1208 struct zpool *zpool; 1209 unsigned int dlen = PAGE_SIZE; 1210 unsigned long handle, value; 1211 char *buf; 1212 u8 *src, *dst; 1213 gfp_t gfp; 1214 int ret; 1215 1216 VM_WARN_ON_ONCE(!folio_test_locked(folio)); 1217 VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); 1218 1219 /* Large folios aren't supported */ 1220 if (folio_test_large(folio)) 1221 return false; 1222 1223 if (!zswap_enabled || !tree) 1224 return false; 1225 1226 /* 1227 * If this is a duplicate, it must be removed before attempting to store 1228 * it, otherwise, if the store fails the old page won't be removed from 1229 * the tree, and it might be written back overriding the new data. 1230 */ 1231 spin_lock(&tree->lock); 1232 dupentry = zswap_rb_search(&tree->rbroot, offset); 1233 if (dupentry) { 1234 zswap_duplicate_entry++; 1235 zswap_invalidate_entry(tree, dupentry); 1236 } 1237 spin_unlock(&tree->lock); 1238 1239 /* 1240 * XXX: zswap reclaim does not work with cgroups yet. Without a 1241 * cgroup-aware entry LRU, we will push out entries system-wide based on 1242 * local cgroup limits. 1243 */ 1244 objcg = get_obj_cgroup_from_folio(folio); 1245 if (objcg && !obj_cgroup_may_zswap(objcg)) 1246 goto reject; 1247 1248 /* reclaim space if needed */ 1249 if (zswap_is_full()) { 1250 zswap_pool_limit_hit++; 1251 zswap_pool_reached_full = true; 1252 goto shrink; 1253 } 1254 1255 if (zswap_pool_reached_full) { 1256 if (!zswap_can_accept()) 1257 goto shrink; 1258 else 1259 zswap_pool_reached_full = false; 1260 } 1261 1262 /* allocate entry */ 1263 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1264 if (!entry) { 1265 zswap_reject_kmemcache_fail++; 1266 goto reject; 1267 } 1268 1269 if (zswap_same_filled_pages_enabled) { 1270 src = kmap_atomic(page); 1271 if (zswap_is_page_same_filled(src, &value)) { 1272 kunmap_atomic(src); 1273 entry->swpentry = swp_entry(type, offset); 1274 entry->length = 0; 1275 entry->value = value; 1276 atomic_inc(&zswap_same_filled_pages); 1277 goto insert_entry; 1278 } 1279 kunmap_atomic(src); 1280 } 1281 1282 if (!zswap_non_same_filled_pages_enabled) 1283 goto freepage; 1284 1285 /* if entry is successfully added, it keeps the reference */ 1286 entry->pool = zswap_pool_current_get(); 1287 if (!entry->pool) 1288 goto freepage; 1289 1290 /* compress */ 1291 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1292 1293 mutex_lock(acomp_ctx->mutex); 1294 1295 dst = acomp_ctx->dstmem; 1296 sg_init_table(&input, 1); 1297 sg_set_page(&input, page, PAGE_SIZE, 0); 1298 1299 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1300 sg_init_one(&output, dst, PAGE_SIZE * 2); 1301 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1302 /* 1303 * it maybe looks a little bit silly that we send an asynchronous request, 1304 * then wait for its completion synchronously. This makes the process look 1305 * synchronous in fact. 1306 * Theoretically, acomp supports users send multiple acomp requests in one 1307 * acomp instance, then get those requests done simultaneously. but in this 1308 * case, zswap actually does store and load page by page, there is no 1309 * existing method to send the second page before the first page is done 1310 * in one thread doing zwap. 1311 * but in different threads running on different cpu, we have different 1312 * acomp instance, so multiple threads can do (de)compression in parallel. 1313 */ 1314 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1315 dlen = acomp_ctx->req->dlen; 1316 1317 if (ret) { 1318 zswap_reject_compress_fail++; 1319 goto put_dstmem; 1320 } 1321 1322 /* store */ 1323 zpool = zswap_find_zpool(entry); 1324 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1325 if (zpool_malloc_support_movable(zpool)) 1326 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1327 ret = zpool_malloc(zpool, dlen, gfp, &handle); 1328 if (ret == -ENOSPC) { 1329 zswap_reject_compress_poor++; 1330 goto put_dstmem; 1331 } 1332 if (ret) { 1333 zswap_reject_alloc_fail++; 1334 goto put_dstmem; 1335 } 1336 buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); 1337 memcpy(buf, dst, dlen); 1338 zpool_unmap_handle(zpool, handle); 1339 mutex_unlock(acomp_ctx->mutex); 1340 1341 /* populate entry */ 1342 entry->swpentry = swp_entry(type, offset); 1343 entry->handle = handle; 1344 entry->length = dlen; 1345 1346 insert_entry: 1347 entry->objcg = objcg; 1348 if (objcg) { 1349 obj_cgroup_charge_zswap(objcg, entry->length); 1350 /* Account before objcg ref is moved to tree */ 1351 count_objcg_event(objcg, ZSWPOUT); 1352 } 1353 1354 /* map */ 1355 spin_lock(&tree->lock); 1356 /* 1357 * A duplicate entry should have been removed at the beginning of this 1358 * function. Since the swap entry should be pinned, if a duplicate is 1359 * found again here it means that something went wrong in the swap 1360 * cache. 1361 */ 1362 while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { 1363 WARN_ON(1); 1364 zswap_duplicate_entry++; 1365 zswap_invalidate_entry(tree, dupentry); 1366 } 1367 if (entry->length) { 1368 spin_lock(&entry->pool->lru_lock); 1369 list_add(&entry->lru, &entry->pool->lru); 1370 spin_unlock(&entry->pool->lru_lock); 1371 } 1372 spin_unlock(&tree->lock); 1373 1374 /* update stats */ 1375 atomic_inc(&zswap_stored_pages); 1376 zswap_update_total_size(); 1377 count_vm_event(ZSWPOUT); 1378 1379 return true; 1380 1381 put_dstmem: 1382 mutex_unlock(acomp_ctx->mutex); 1383 zswap_pool_put(entry->pool); 1384 freepage: 1385 zswap_entry_cache_free(entry); 1386 reject: 1387 if (objcg) 1388 obj_cgroup_put(objcg); 1389 return false; 1390 1391 shrink: 1392 pool = zswap_pool_last_get(); 1393 if (pool && !queue_work(shrink_wq, &pool->shrink_work)) 1394 zswap_pool_put(pool); 1395 goto reject; 1396 } 1397 1398 bool zswap_load(struct folio *folio) 1399 { 1400 swp_entry_t swp = folio->swap; 1401 int type = swp_type(swp); 1402 pgoff_t offset = swp_offset(swp); 1403 struct page *page = &folio->page; 1404 struct zswap_tree *tree = zswap_trees[type]; 1405 struct zswap_entry *entry; 1406 struct scatterlist input, output; 1407 struct crypto_acomp_ctx *acomp_ctx; 1408 u8 *src, *dst, *tmp; 1409 struct zpool *zpool; 1410 unsigned int dlen; 1411 bool ret; 1412 1413 VM_WARN_ON_ONCE(!folio_test_locked(folio)); 1414 1415 /* find */ 1416 spin_lock(&tree->lock); 1417 entry = zswap_entry_find_get(&tree->rbroot, offset); 1418 if (!entry) { 1419 spin_unlock(&tree->lock); 1420 return false; 1421 } 1422 spin_unlock(&tree->lock); 1423 1424 if (!entry->length) { 1425 dst = kmap_atomic(page); 1426 zswap_fill_page(dst, entry->value); 1427 kunmap_atomic(dst); 1428 ret = true; 1429 goto stats; 1430 } 1431 1432 zpool = zswap_find_zpool(entry); 1433 if (!zpool_can_sleep_mapped(zpool)) { 1434 tmp = kmalloc(entry->length, GFP_KERNEL); 1435 if (!tmp) { 1436 ret = false; 1437 goto freeentry; 1438 } 1439 } 1440 1441 /* decompress */ 1442 dlen = PAGE_SIZE; 1443 src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); 1444 1445 if (!zpool_can_sleep_mapped(zpool)) { 1446 memcpy(tmp, src, entry->length); 1447 src = tmp; 1448 zpool_unmap_handle(zpool, entry->handle); 1449 } 1450 1451 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1452 mutex_lock(acomp_ctx->mutex); 1453 sg_init_one(&input, src, entry->length); 1454 sg_init_table(&output, 1); 1455 sg_set_page(&output, page, PAGE_SIZE, 0); 1456 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1457 if (crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)) 1458 WARN_ON(1); 1459 mutex_unlock(acomp_ctx->mutex); 1460 1461 if (zpool_can_sleep_mapped(zpool)) 1462 zpool_unmap_handle(zpool, entry->handle); 1463 else 1464 kfree(tmp); 1465 1466 ret = true; 1467 stats: 1468 count_vm_event(ZSWPIN); 1469 if (entry->objcg) 1470 count_objcg_event(entry->objcg, ZSWPIN); 1471 freeentry: 1472 spin_lock(&tree->lock); 1473 if (ret && zswap_exclusive_loads_enabled) { 1474 zswap_invalidate_entry(tree, entry); 1475 folio_mark_dirty(folio); 1476 } else if (entry->length) { 1477 spin_lock(&entry->pool->lru_lock); 1478 list_move(&entry->lru, &entry->pool->lru); 1479 spin_unlock(&entry->pool->lru_lock); 1480 } 1481 zswap_entry_put(tree, entry); 1482 spin_unlock(&tree->lock); 1483 1484 return ret; 1485 } 1486 1487 void zswap_invalidate(int type, pgoff_t offset) 1488 { 1489 struct zswap_tree *tree = zswap_trees[type]; 1490 struct zswap_entry *entry; 1491 1492 /* find */ 1493 spin_lock(&tree->lock); 1494 entry = zswap_rb_search(&tree->rbroot, offset); 1495 if (!entry) { 1496 /* entry was written back */ 1497 spin_unlock(&tree->lock); 1498 return; 1499 } 1500 zswap_invalidate_entry(tree, entry); 1501 spin_unlock(&tree->lock); 1502 } 1503 1504 void zswap_swapon(int type) 1505 { 1506 struct zswap_tree *tree; 1507 1508 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1509 if (!tree) { 1510 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1511 return; 1512 } 1513 1514 tree->rbroot = RB_ROOT; 1515 spin_lock_init(&tree->lock); 1516 zswap_trees[type] = tree; 1517 } 1518 1519 void zswap_swapoff(int type) 1520 { 1521 struct zswap_tree *tree = zswap_trees[type]; 1522 struct zswap_entry *entry, *n; 1523 1524 if (!tree) 1525 return; 1526 1527 /* walk the tree and free everything */ 1528 spin_lock(&tree->lock); 1529 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1530 zswap_free_entry(entry); 1531 tree->rbroot = RB_ROOT; 1532 spin_unlock(&tree->lock); 1533 kfree(tree); 1534 zswap_trees[type] = NULL; 1535 } 1536 1537 /********************************* 1538 * debugfs functions 1539 **********************************/ 1540 #ifdef CONFIG_DEBUG_FS 1541 #include <linux/debugfs.h> 1542 1543 static struct dentry *zswap_debugfs_root; 1544 1545 static int zswap_debugfs_init(void) 1546 { 1547 if (!debugfs_initialized()) 1548 return -ENODEV; 1549 1550 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1551 1552 debugfs_create_u64("pool_limit_hit", 0444, 1553 zswap_debugfs_root, &zswap_pool_limit_hit); 1554 debugfs_create_u64("reject_reclaim_fail", 0444, 1555 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1556 debugfs_create_u64("reject_alloc_fail", 0444, 1557 zswap_debugfs_root, &zswap_reject_alloc_fail); 1558 debugfs_create_u64("reject_kmemcache_fail", 0444, 1559 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1560 debugfs_create_u64("reject_compress_fail", 0444, 1561 zswap_debugfs_root, &zswap_reject_compress_fail); 1562 debugfs_create_u64("reject_compress_poor", 0444, 1563 zswap_debugfs_root, &zswap_reject_compress_poor); 1564 debugfs_create_u64("written_back_pages", 0444, 1565 zswap_debugfs_root, &zswap_written_back_pages); 1566 debugfs_create_u64("duplicate_entry", 0444, 1567 zswap_debugfs_root, &zswap_duplicate_entry); 1568 debugfs_create_u64("pool_total_size", 0444, 1569 zswap_debugfs_root, &zswap_pool_total_size); 1570 debugfs_create_atomic_t("stored_pages", 0444, 1571 zswap_debugfs_root, &zswap_stored_pages); 1572 debugfs_create_atomic_t("same_filled_pages", 0444, 1573 zswap_debugfs_root, &zswap_same_filled_pages); 1574 1575 return 0; 1576 } 1577 #else 1578 static int zswap_debugfs_init(void) 1579 { 1580 return 0; 1581 } 1582 #endif 1583 1584 /********************************* 1585 * module init and exit 1586 **********************************/ 1587 static int zswap_setup(void) 1588 { 1589 struct zswap_pool *pool; 1590 int ret; 1591 1592 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1593 if (!zswap_entry_cache) { 1594 pr_err("entry cache creation failed\n"); 1595 goto cache_fail; 1596 } 1597 1598 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1599 zswap_dstmem_prepare, zswap_dstmem_dead); 1600 if (ret) { 1601 pr_err("dstmem alloc failed\n"); 1602 goto dstmem_fail; 1603 } 1604 1605 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1606 "mm/zswap_pool:prepare", 1607 zswap_cpu_comp_prepare, 1608 zswap_cpu_comp_dead); 1609 if (ret) 1610 goto hp_fail; 1611 1612 pool = __zswap_pool_create_fallback(); 1613 if (pool) { 1614 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1615 zpool_get_type(pool->zpools[0])); 1616 list_add(&pool->list, &zswap_pools); 1617 zswap_has_pool = true; 1618 } else { 1619 pr_err("pool creation failed\n"); 1620 zswap_enabled = false; 1621 } 1622 1623 shrink_wq = create_workqueue("zswap-shrink"); 1624 if (!shrink_wq) 1625 goto fallback_fail; 1626 1627 if (zswap_debugfs_init()) 1628 pr_warn("debugfs initialization failed\n"); 1629 zswap_init_state = ZSWAP_INIT_SUCCEED; 1630 return 0; 1631 1632 fallback_fail: 1633 if (pool) 1634 zswap_pool_destroy(pool); 1635 hp_fail: 1636 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1637 dstmem_fail: 1638 kmem_cache_destroy(zswap_entry_cache); 1639 cache_fail: 1640 /* if built-in, we aren't unloaded on failure; don't allow use */ 1641 zswap_init_state = ZSWAP_INIT_FAILED; 1642 zswap_enabled = false; 1643 return -ENOMEM; 1644 } 1645 1646 static int __init zswap_init(void) 1647 { 1648 if (!zswap_enabled) 1649 return 0; 1650 return zswap_setup(); 1651 } 1652 /* must be late so crypto has time to come up */ 1653 late_initcall(zswap_init); 1654 1655 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1656 MODULE_DESCRIPTION("Compressed cache for swap pages"); 1657