Lines Matching +full:pool +full:- +full:long

1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * zswap.c - zswap driver file
7 * RAM-based memory pool. This can result in a significant I/O reduction on
31 #include <linux/page-flags.h>
57 /* Pool limit was hit (see zswap_max_pool_percent) */
59 /* Pages written back when pool limit was reached */
61 /* Store failed due to a reclaim failure after pool limit was reached */
76 /* Pool limit was hit, we need to calm down */
110 /* The maximum percentage of memory that the compressed pool can occupy */
115 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
119 /* Enable/disable memory pressure-based shrinker. */
148 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
178 * swpentry - associated swap entry, the offset indexes into the red-black tree
179 * length - the length in bytes of the compressed page data. Needed during
181 * referenced - true if the entry recently entered the zswap pool. Unset by the
185 * pool - the zswap_pool the entry's data is in
186 * handle - zsmalloc allocation handle that stores the compressed page data
187 * objcg - the obj_cgroup that the compressed memory is charged to
188 * lru - handle to the pool's lru used to evict pages.
194 struct zswap_pool *pool; member
195 unsigned long handle;
203 /* RCU-protected iteration */
207 /* pool counter to provide unique names to zsmalloc */
221 /* init completed, but couldn't create the initial pool */
238 pr_debug("%s pool %s\n", msg, (p)->tfm_name)
241 * pool functions
247 struct zswap_pool *pool; in zswap_pool_create() local
254 pool = kzalloc(sizeof(*pool), GFP_KERNEL); in zswap_pool_create()
255 if (!pool) in zswap_pool_create()
258 /* unique name for each pool specifically required by zsmalloc */ in zswap_pool_create()
260 pool->zs_pool = zs_create_pool(name); in zswap_pool_create()
261 if (!pool->zs_pool) in zswap_pool_create()
264 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); in zswap_pool_create()
266 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); in zswap_pool_create()
267 if (!pool->acomp_ctx) { in zswap_pool_create()
273 mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); in zswap_pool_create()
276 &pool->node); in zswap_pool_create()
280 /* being the current pool takes 1 ref; this func expects the in zswap_pool_create()
281 * caller to always add the new pool as the current pool in zswap_pool_create()
283 ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, in zswap_pool_create()
287 INIT_LIST_HEAD(&pool->list); in zswap_pool_create()
289 zswap_pool_debug("created", pool); in zswap_pool_create()
291 return pool; in zswap_pool_create()
294 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); in zswap_pool_create()
296 if (pool->acomp_ctx) in zswap_pool_create()
297 free_percpu(pool->acomp_ctx); in zswap_pool_create()
298 if (pool->zs_pool) in zswap_pool_create()
299 zs_destroy_pool(pool->zs_pool); in zswap_pool_create()
300 kfree(pool); in zswap_pool_create()
323 static void zswap_pool_destroy(struct zswap_pool *pool) in zswap_pool_destroy() argument
325 zswap_pool_debug("destroying", pool); in zswap_pool_destroy()
327 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); in zswap_pool_destroy()
328 free_percpu(pool->acomp_ctx); in zswap_pool_destroy()
330 zs_destroy_pool(pool->zs_pool); in zswap_pool_destroy()
331 kfree(pool); in zswap_pool_destroy()
336 struct zswap_pool *pool = container_of(work, typeof(*pool), in __zswap_pool_release() local
342 WARN_ON(!percpu_ref_is_zero(&pool->ref)); in __zswap_pool_release()
343 percpu_ref_exit(&pool->ref); in __zswap_pool_release()
345 /* pool is now off zswap_pools list and has no references. */ in __zswap_pool_release()
346 zswap_pool_destroy(pool); in __zswap_pool_release()
353 struct zswap_pool *pool; in __zswap_pool_empty() local
355 pool = container_of(ref, typeof(*pool), ref); in __zswap_pool_empty()
359 WARN_ON(pool == zswap_pool_current()); in __zswap_pool_empty()
361 list_del_rcu(&pool->list); in __zswap_pool_empty()
363 INIT_WORK(&pool->release_work, __zswap_pool_release); in __zswap_pool_empty()
364 schedule_work(&pool->release_work); in __zswap_pool_empty()
369 static int __must_check zswap_pool_tryget(struct zswap_pool *pool) in zswap_pool_tryget() argument
371 if (!pool) in zswap_pool_tryget()
374 return percpu_ref_tryget(&pool->ref); in zswap_pool_tryget()
378 static void zswap_pool_get(struct zswap_pool *pool) in zswap_pool_get() argument
380 percpu_ref_get(&pool->ref); in zswap_pool_get()
383 static void zswap_pool_put(struct zswap_pool *pool) in zswap_pool_put() argument
385 percpu_ref_put(&pool->ref); in zswap_pool_put()
390 struct zswap_pool *pool; in __zswap_pool_current() local
392 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); in __zswap_pool_current()
393 WARN_ONCE(!pool && zswap_has_pool, in __zswap_pool_current()
394 "%s: no page storage pool!\n", __func__); in __zswap_pool_current()
396 return pool; in __zswap_pool_current()
408 struct zswap_pool *pool; in zswap_pool_current_get() local
412 pool = __zswap_pool_current(); in zswap_pool_current_get()
413 if (!zswap_pool_tryget(pool)) in zswap_pool_current_get()
414 pool = NULL; in zswap_pool_current_get()
418 return pool; in zswap_pool_current_get()
421 /* type and compressor must be null-terminated */
424 struct zswap_pool *pool; in zswap_pool_find_get() local
428 list_for_each_entry_rcu(pool, &zswap_pools, list) { in zswap_pool_find_get()
429 if (strcmp(pool->tfm_name, compressor)) in zswap_pool_find_get()
432 if (!zswap_pool_tryget(pool)) in zswap_pool_find_get()
434 return pool; in zswap_pool_find_get()
440 static unsigned long zswap_max_pages(void) in zswap_max_pages()
445 static unsigned long zswap_accept_thr_pages(void) in zswap_accept_thr_pages()
450 unsigned long zswap_total_pages(void) in zswap_total_pages()
452 struct zswap_pool *pool; in zswap_total_pages() local
453 unsigned long total = 0; in zswap_total_pages()
456 list_for_each_entry_rcu(pool, &zswap_pools, list) in zswap_total_pages()
457 total += zs_get_total_pages(pool->zs_pool); in zswap_total_pages()
465 unsigned long cur_pages = zswap_total_pages(); in zswap_check_limits()
466 unsigned long max_pages = zswap_max_pages(); in zswap_check_limits()
484 struct zswap_pool *pool, *put_pool = NULL; in zswap_compressor_param_set() local
496 if (!zswap_has_pool || strcmp(s, *(char **)kp->arg)) in zswap_compressor_param_set()
501 ret = -ENODEV; in zswap_compressor_param_set()
510 return -ENOENT; in zswap_compressor_param_set()
515 pool = zswap_pool_find_get(s); in zswap_compressor_param_set()
516 if (pool) { in zswap_compressor_param_set()
517 zswap_pool_debug("using existing", pool); in zswap_compressor_param_set()
518 WARN_ON(pool == zswap_pool_current()); in zswap_compressor_param_set()
519 list_del_rcu(&pool->list); in zswap_compressor_param_set()
524 if (!pool) in zswap_compressor_param_set()
525 pool = zswap_pool_create(s); in zswap_compressor_param_set()
529 * when the pool was decommissioned and switch it again in zswap_compressor_param_set()
532 percpu_ref_resurrect(&pool->ref); in zswap_compressor_param_set()
535 zswap_pool_put(pool); in zswap_compressor_param_set()
538 if (pool) in zswap_compressor_param_set()
541 ret = -EINVAL; in zswap_compressor_param_set()
547 list_add_rcu(&pool->list, &zswap_pools); in zswap_compressor_param_set()
549 } else if (pool) { in zswap_compressor_param_set()
551 * Add the possibly pre-existing pool to the end of the pools in zswap_compressor_param_set()
555 list_add_tail_rcu(&pool->list, &zswap_pools); in zswap_compressor_param_set()
556 put_pool = pool; in zswap_compressor_param_set()
562 * Drop the ref from either the old current pool, in zswap_compressor_param_set()
563 * or the new pool we failed to add in zswap_compressor_param_set()
566 percpu_ref_kill(&put_pool->ref); in zswap_compressor_param_set()
574 int ret = -ENODEV; in zswap_enabled_param_set()
576 /* if this is load-time (pre-init) param setting, only set param. */ in zswap_enabled_param_set()
588 pr_err("can't enable, no pool configured\n"); in zswap_enabled_param_set()
608 return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; in mem_cgroup_from_entry()
641 list_lru_add(list_lru, &entry->lru, nid, memcg); in zswap_lru_add()
653 list_lru_del(list_lru, &entry->lru, nid, memcg); in zswap_lru_del()
659 atomic_long_set(&lruvec->zswap_lruvec_state.nr_disk_swapins, 0); in zswap_lruvec_state_init()
668 atomic_long_inc(&lruvec->zswap_lruvec_state.nr_disk_swapins); in zswap_folio_swapin()
720 zs_free(entry->pool->zs_pool, entry->handle); in zswap_entry_free()
721 zswap_pool_put(entry->pool); in zswap_entry_free()
722 if (entry->objcg) { in zswap_entry_free()
723 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); in zswap_entry_free()
724 obj_cgroup_put(entry->objcg); in zswap_entry_free()
726 if (entry->length == PAGE_SIZE) in zswap_entry_free()
737 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); in zswap_cpu_comp_prepare() local
738 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); in zswap_cpu_comp_prepare()
746 ret = -ENOMEM; in zswap_cpu_comp_prepare()
750 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); in zswap_cpu_comp_prepare()
753 pool->tfm_name, PTR_ERR(acomp)); in zswap_cpu_comp_prepare()
761 pool->tfm_name); in zswap_cpu_comp_prepare()
762 ret = -ENOMEM; in zswap_cpu_comp_prepare()
771 mutex_lock(&acomp_ctx->mutex); in zswap_cpu_comp_prepare()
772 crypto_init_wait(&acomp_ctx->wait); in zswap_cpu_comp_prepare()
780 crypto_req_done, &acomp_ctx->wait); in zswap_cpu_comp_prepare()
782 acomp_ctx->buffer = buffer; in zswap_cpu_comp_prepare()
783 acomp_ctx->acomp = acomp; in zswap_cpu_comp_prepare()
784 acomp_ctx->is_sleepable = acomp_is_async(acomp); in zswap_cpu_comp_prepare()
785 acomp_ctx->req = req; in zswap_cpu_comp_prepare()
786 mutex_unlock(&acomp_ctx->mutex); in zswap_cpu_comp_prepare()
798 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); in zswap_cpu_comp_dead() local
799 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); in zswap_cpu_comp_dead()
807 mutex_lock(&acomp_ctx->mutex); in zswap_cpu_comp_dead()
808 req = acomp_ctx->req; in zswap_cpu_comp_dead()
809 acomp = acomp_ctx->acomp; in zswap_cpu_comp_dead()
810 buffer = acomp_ctx->buffer; in zswap_cpu_comp_dead()
811 acomp_ctx->req = NULL; in zswap_cpu_comp_dead()
812 acomp_ctx->acomp = NULL; in zswap_cpu_comp_dead()
813 acomp_ctx->buffer = NULL; in zswap_cpu_comp_dead()
814 mutex_unlock(&acomp_ctx->mutex); in zswap_cpu_comp_dead()
829 static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool) in acomp_ctx_get_cpu_lock() argument
834 acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); in acomp_ctx_get_cpu_lock()
835 mutex_lock(&acomp_ctx->mutex); in acomp_ctx_get_cpu_lock()
836 if (likely(acomp_ctx->req)) in acomp_ctx_get_cpu_lock()
840 * getting the per-CPU ctx but before the mutex was acquired. If in acomp_ctx_get_cpu_lock()
842 * already freed ctx->req (among other things) and set it to in acomp_ctx_get_cpu_lock()
845 mutex_unlock(&acomp_ctx->mutex); in acomp_ctx_get_cpu_lock()
851 mutex_unlock(&acomp_ctx->mutex); in acomp_ctx_put_unlock()
855 struct zswap_pool *pool) in zswap_compress() argument
861 unsigned long handle; in zswap_compress()
866 acomp_ctx = acomp_ctx_get_cpu_lock(pool); in zswap_compress()
867 dst = acomp_ctx->buffer; in zswap_compress()
872 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); in zswap_compress()
886 comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); in zswap_compress()
887 dlen = acomp_ctx->req->dlen; in zswap_compress()
900 comp_ret = comp_ret ? comp_ret : -EINVAL; in zswap_compress()
910 handle = zs_malloc(pool->zs_pool, dlen, gfp, page_to_nid(page)); in zswap_compress()
916 zs_obj_write(pool->zs_pool, handle, dst, dlen); in zswap_compress()
917 entry->handle = handle; in zswap_compress()
918 entry->length = dlen; in zswap_compress()
923 if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) in zswap_compress()
936 struct zswap_pool *pool = entry->pool; in zswap_decompress() local
942 acomp_ctx = acomp_ctx_get_cpu_lock(pool); in zswap_decompress()
943 obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer); in zswap_decompress()
946 if (entry->length == PAGE_SIZE) { in zswap_decompress()
947 memcpy_to_folio(folio, 0, obj, entry->length); in zswap_decompress()
953 * acomp_ctx->buffer is not used. However, sg_init_one() does not in zswap_decompress()
954 * handle highmem addresses, so copy the object to acomp_ctx->buffer. in zswap_decompress()
959 WARN_ON_ONCE(obj == acomp_ctx->buffer); in zswap_decompress()
960 memcpy(acomp_ctx->buffer, obj, entry->length); in zswap_decompress()
961 src = acomp_ctx->buffer; in zswap_decompress()
964 sg_init_one(&input, src, entry->length); in zswap_decompress()
967 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); in zswap_decompress()
968 decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); in zswap_decompress()
969 dlen = acomp_ctx->req->dlen; in zswap_decompress()
972 zs_obj_read_end(pool->zs_pool, entry->handle, obj); in zswap_decompress()
979 pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n", in zswap_decompress()
980 swp_type(entry->swpentry), in zswap_decompress()
981 swp_offset(entry->swpentry), in zswap_decompress()
982 entry->pool->tfm_name, entry->length, dlen); in zswap_decompress()
1015 return -EEXIST; in zswap_writeback_entry()
1022 return -ENOMEM; in zswap_writeback_entry()
1032 ret = -EEXIST; in zswap_writeback_entry()
1047 ret = -ENOMEM; in zswap_writeback_entry()
1052 ret = -EIO; in zswap_writeback_entry()
1059 if (entry->objcg) in zswap_writeback_entry()
1060 count_objcg_events(entry->objcg, ZSWPWB, 1); in zswap_writeback_entry()
1074 if (ret && ret != -EEXIST) { in zswap_writeback_entry()
1092 * adjusted by the pool activities - if the pool is dominated by new entries
1094 * the writeback rate will slow down. On the other hand, if the pool has a
1121 if (entry->referenced) { in shrink_memcg_cb()
1122 entry->referenced = false; in shrink_memcg_cb()
1147 * We don't do any trylocking; -ENOMEM comes closest, in shrink_memcg_cb()
1151 list_move_tail(item, &l->list); in shrink_memcg_cb()
1158 swpentry = entry->swpentry; in shrink_memcg_cb()
1164 spin_unlock(&l->lock); in shrink_memcg_cb()
1177 if (writeback_result == -EEXIST && encountered_page_in_swapcache) { in shrink_memcg_cb()
1188 static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, in zswap_shrinker_scan()
1191 unsigned long shrink_ret; in zswap_shrinker_scan()
1195 !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { in zswap_shrinker_scan()
1196 sc->nr_scanned = 0; in zswap_shrinker_scan()
1209 static unsigned long zswap_shrinker_count(struct shrinker *shrinker, in zswap_shrinker_count()
1212 struct mem_cgroup *memcg = sc->memcg; in zswap_shrinker_count()
1213 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); in zswap_shrinker_count()
1215 &lruvec->zswap_lruvec_state.nr_disk_swapins; in zswap_shrinker_count()
1216 unsigned long nr_backing, nr_stored, nr_freeable, nr_disk_swapins_cur, in zswap_shrinker_count()
1225 * rules (may_enter_fs()), which apply on a per-folio basis. in zswap_shrinker_count()
1227 if (!gfp_has_io_fs(sc->gfp_mask)) in zswap_shrinker_count()
1231 * For memcg, use the cgroup-wide ZSWAP stats since we don't in zswap_shrinker_count()
1232 * have them per-node and thus per-lruvec. Careful if memcg is in zswap_shrinker_count()
1233 * runtime-disabled: we can get sc->memcg == NULL, which is ok in zswap_shrinker_count()
1236 * Without memcg, use the zswap pool-wide metrics. in zswap_shrinker_count()
1264 nr_remain = nr_disk_swapins_cur - nr_freeable; in zswap_shrinker_count()
1268 nr_freeable -= nr_disk_swapins_cur - nr_remain; in zswap_shrinker_count()
1286 shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); in zswap_alloc_shrinker()
1290 shrinker->scan_objects = zswap_shrinker_scan; in zswap_alloc_shrinker()
1291 shrinker->count_objects = zswap_shrinker_count; in zswap_alloc_shrinker()
1292 shrinker->batch = 0; in zswap_alloc_shrinker()
1293 shrinker->seeks = DEFAULT_SEEKS; in zswap_alloc_shrinker()
1302 return -ENOENT; in shrink_memcg()
1309 return -ENOENT; in shrink_memcg()
1312 unsigned long nr_to_walk = 1; in shrink_memcg()
1316 scanned += 1 - nr_to_walk; in shrink_memcg()
1320 return -ENOENT; in shrink_memcg()
1322 return shrunk ? 0 : -EAGAIN; in shrink_memcg()
1329 unsigned long thr; in shrink_worker()
1335 * Global reclaim will select cgroup in a round-robin fashion from all in shrink_worker()
1337 * writeback-disabled memcgs (memory.zswap.writeback=0) are not in shrink_worker()
1342 * - No writeback-candidate memcgs found in a memcg tree walk. in shrink_worker()
1343 * - Shrinking a writeback-candidate memcg failed. in shrink_worker()
1395 * There are no writeback-candidate pages in the memcg. in shrink_worker()
1396 * This is not an issue as long as we can find another memcg in shrink_worker()
1400 if (ret == -ENOENT) in shrink_worker()
1417 struct zswap_pool *pool) in zswap_store_page() argument
1429 if (!zswap_compress(page, entry, pool)) in zswap_store_page()
1438 WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); in zswap_store_page()
1453 * no further possibility of failure. Grab refs to the pool and objcg, in zswap_store_page()
1458 zswap_pool_get(pool); in zswap_store_page()
1461 obj_cgroup_charge_zswap(objcg, entry->length); in zswap_store_page()
1464 if (entry->length == PAGE_SIZE) in zswap_store_page()
1477 entry->pool = pool; in zswap_store_page()
1478 entry->swpentry = page_swpentry; in zswap_store_page()
1479 entry->objcg = objcg; in zswap_store_page()
1480 entry->referenced = true; in zswap_store_page()
1481 if (entry->length) { in zswap_store_page()
1482 INIT_LIST_HEAD(&entry->lru); in zswap_store_page()
1489 zs_free(pool->zs_pool, entry->handle); in zswap_store_page()
1497 long nr_pages = folio_nr_pages(folio); in zswap_store()
1498 swp_entry_t swp = folio->swap; in zswap_store()
1501 struct zswap_pool *pool; in zswap_store() local
1503 long index; in zswap_store()
1524 pool = zswap_pool_current_get(); in zswap_store()
1525 if (!pool) in zswap_store()
1540 if (!zswap_store_page(page, objcg, pool)) in zswap_store()
1552 zswap_pool_put(pool); in zswap_store()
1582 * zswap_load() - load a folio from zswap
1585 * Return: 0 on success, with the folio unlocked and marked up-to-date, or one
1588 * -EIO: if the swapped out content was in zswap, but could not be loaded
1590 * NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
1593 * -EINVAL: if the swapped out content was in zswap, but the page belongs
1595 * but NOT marked up-to-date, so that an IO error is emitted (e.g.
1598 * -ENOENT: if the swapped out content was not in zswap. The folio remains
1603 swp_entry_t swp = folio->swap; in zswap_load()
1612 return -ENOENT; in zswap_load()
1621 return -EINVAL; in zswap_load()
1626 return -ENOENT; in zswap_load()
1630 return -EIO; in zswap_load()
1636 if (entry->objcg) in zswap_load()
1637 count_objcg_events(entry->objcg, ZSWPIN, 1); in zswap_load()
1643 * in-memory copies outweighs any benefits of caching the in zswap_load()
1675 int zswap_swapon(int type, unsigned long nr_pages) in zswap_swapon()
1684 return -ENOMEM; in zswap_swapon()
1745 return -ENODEV; in zswap_debugfs_init()
1787 struct zswap_pool *pool; in zswap_setup() local
1803 shrink_wq = alloc_workqueue("zswap-shrink", in zswap_setup()
1817 pool = __zswap_pool_create_fallback(); in zswap_setup()
1818 if (pool) { in zswap_setup()
1819 pr_info("loaded using pool %s\n", pool->tfm_name); in zswap_setup()
1820 list_add(&pool->list, &zswap_pools); in zswap_setup()
1824 pr_err("pool creation failed\n"); in zswap_setup()
1842 /* if built-in, we aren't unloaded on failure; don't allow use */ in zswap_setup()
1845 return -ENOMEM; in zswap_setup()