109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 29a001fc1SVitaly Wool /* 39a001fc1SVitaly Wool * z3fold.c 49a001fc1SVitaly Wool * 59a001fc1SVitaly Wool * Author: Vitaly Wool <vitaly.wool@konsulko.com> 69a001fc1SVitaly Wool * Copyright (C) 2016, Sony Mobile Communications Inc. 79a001fc1SVitaly Wool * 89a001fc1SVitaly Wool * This implementation is based on zbud written by Seth Jennings. 99a001fc1SVitaly Wool * 109a001fc1SVitaly Wool * z3fold is an special purpose allocator for storing compressed pages. It 119a001fc1SVitaly Wool * can store up to three compressed pages per page which improves the 129a001fc1SVitaly Wool * compression ratio of zbud while retaining its main concepts (e. g. always 139a001fc1SVitaly Wool * storing an integral number of objects per page) and simplicity. 149a001fc1SVitaly Wool * It still has simple and deterministic reclaim properties that make it 159a001fc1SVitaly Wool * preferable to a higher density approach (with no requirement on integral 169a001fc1SVitaly Wool * number of object per page) when reclaim is used. 179a001fc1SVitaly Wool * 189a001fc1SVitaly Wool * As in zbud, pages are divided into "chunks". The size of the chunks is 199a001fc1SVitaly Wool * fixed at compile time and is determined by NCHUNKS_ORDER below. 209a001fc1SVitaly Wool * 219a001fc1SVitaly Wool * z3fold doesn't export any API and is meant to be used via zpool API. 229a001fc1SVitaly Wool */ 239a001fc1SVitaly Wool 249a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 259a001fc1SVitaly Wool 269a001fc1SVitaly Wool #include <linux/atomic.h> 27d30561c5SVitaly Wool #include <linux/sched.h> 281f862989SVitaly Wool #include <linux/cpumask.h> 299a001fc1SVitaly Wool #include <linux/list.h> 309a001fc1SVitaly Wool #include <linux/mm.h> 319a001fc1SVitaly Wool #include <linux/module.h> 321f862989SVitaly Wool #include <linux/page-flags.h> 331f862989SVitaly Wool #include <linux/migrate.h> 341f862989SVitaly Wool #include <linux/node.h> 351f862989SVitaly Wool #include <linux/compaction.h> 36d30561c5SVitaly Wool #include <linux/percpu.h> 371f862989SVitaly Wool #include <linux/mount.h> 38ea8157abSDavid Howells #include <linux/pseudo_fs.h> 391f862989SVitaly Wool #include <linux/fs.h> 409a001fc1SVitaly Wool #include <linux/preempt.h> 41d30561c5SVitaly Wool #include <linux/workqueue.h> 429a001fc1SVitaly Wool #include <linux/slab.h> 439a001fc1SVitaly Wool #include <linux/spinlock.h> 449a001fc1SVitaly Wool #include <linux/zpool.h> 45ea8157abSDavid Howells #include <linux/magic.h> 469a001fc1SVitaly Wool 479a001fc1SVitaly Wool /* 489a001fc1SVitaly Wool * NCHUNKS_ORDER determines the internal allocation granularity, effectively 499a001fc1SVitaly Wool * adjusting internal fragmentation. It also determines the number of 509a001fc1SVitaly Wool * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 51ede93213SVitaly Wool * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 52ede93213SVitaly Wool * in the beginning of an allocated page are occupied by z3fold header, so 53ede93213SVitaly Wool * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 54ede93213SVitaly Wool * which shows the max number of free chunks in z3fold page, also there will 55ede93213SVitaly Wool * be 63, or 62, respectively, freelists per pool. 569a001fc1SVitaly Wool */ 579a001fc1SVitaly Wool #define NCHUNKS_ORDER 6 589a001fc1SVitaly Wool 599a001fc1SVitaly Wool #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 609a001fc1SVitaly Wool #define CHUNK_SIZE (1 << CHUNK_SHIFT) 61ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 62ede93213SVitaly Wool #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 63ede93213SVitaly Wool #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 649a001fc1SVitaly Wool #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) 659a001fc1SVitaly Wool 66f201ebd8Szhong jiang #define BUDDY_MASK (0x3) 67ca0246bbSVitaly Wool #define BUDDY_SHIFT 2 687c2b8baaSVitaly Wool #define SLOTS_ALIGN (0x40) 697c2b8baaSVitaly Wool 707c2b8baaSVitaly Wool /***************** 717c2b8baaSVitaly Wool * Structures 727c2b8baaSVitaly Wool *****************/ 737c2b8baaSVitaly Wool struct z3fold_pool; 747c2b8baaSVitaly Wool struct z3fold_ops { 757c2b8baaSVitaly Wool int (*evict)(struct z3fold_pool *pool, unsigned long handle); 767c2b8baaSVitaly Wool }; 777c2b8baaSVitaly Wool 787c2b8baaSVitaly Wool enum buddy { 797c2b8baaSVitaly Wool HEADLESS = 0, 807c2b8baaSVitaly Wool FIRST, 817c2b8baaSVitaly Wool MIDDLE, 827c2b8baaSVitaly Wool LAST, 837c2b8baaSVitaly Wool BUDDIES_MAX = LAST 847c2b8baaSVitaly Wool }; 857c2b8baaSVitaly Wool 867c2b8baaSVitaly Wool struct z3fold_buddy_slots { 877c2b8baaSVitaly Wool /* 887c2b8baaSVitaly Wool * we are using BUDDY_MASK in handle_to_buddy etc. so there should 897c2b8baaSVitaly Wool * be enough slots to hold all possible variants 907c2b8baaSVitaly Wool */ 917c2b8baaSVitaly Wool unsigned long slot[BUDDY_MASK + 1]; 927c2b8baaSVitaly Wool unsigned long pool; /* back link + flags */ 937c2b8baaSVitaly Wool }; 947c2b8baaSVitaly Wool #define HANDLE_FLAG_MASK (0x03) 957c2b8baaSVitaly Wool 967c2b8baaSVitaly Wool /* 977c2b8baaSVitaly Wool * struct z3fold_header - z3fold page metadata occupying first chunks of each 987c2b8baaSVitaly Wool * z3fold page, except for HEADLESS pages 997c2b8baaSVitaly Wool * @buddy: links the z3fold page into the relevant list in the 1007c2b8baaSVitaly Wool * pool 1017c2b8baaSVitaly Wool * @page_lock: per-page lock 1027c2b8baaSVitaly Wool * @refcount: reference count for the z3fold page 1037c2b8baaSVitaly Wool * @work: work_struct for page layout optimization 1047c2b8baaSVitaly Wool * @slots: pointer to the structure holding buddy slots 105bb9a374dSVitaly Wool * @pool: pointer to the containing pool 1067c2b8baaSVitaly Wool * @cpu: CPU which this page "belongs" to 1077c2b8baaSVitaly Wool * @first_chunks: the size of the first buddy in chunks, 0 if free 1087c2b8baaSVitaly Wool * @middle_chunks: the size of the middle buddy in chunks, 0 if free 1097c2b8baaSVitaly Wool * @last_chunks: the size of the last buddy in chunks, 0 if free 1107c2b8baaSVitaly Wool * @first_num: the starting number (for the first handle) 1111f862989SVitaly Wool * @mapped_count: the number of objects currently mapped 1127c2b8baaSVitaly Wool */ 1137c2b8baaSVitaly Wool struct z3fold_header { 1147c2b8baaSVitaly Wool struct list_head buddy; 1157c2b8baaSVitaly Wool spinlock_t page_lock; 1167c2b8baaSVitaly Wool struct kref refcount; 1177c2b8baaSVitaly Wool struct work_struct work; 1187c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 119bb9a374dSVitaly Wool struct z3fold_pool *pool; 1207c2b8baaSVitaly Wool short cpu; 1217c2b8baaSVitaly Wool unsigned short first_chunks; 1227c2b8baaSVitaly Wool unsigned short middle_chunks; 1237c2b8baaSVitaly Wool unsigned short last_chunks; 1247c2b8baaSVitaly Wool unsigned short start_middle; 1257c2b8baaSVitaly Wool unsigned short first_num:2; 1261f862989SVitaly Wool unsigned short mapped_count:2; 1277c2b8baaSVitaly Wool }; 1289a001fc1SVitaly Wool 1299a001fc1SVitaly Wool /** 1309a001fc1SVitaly Wool * struct z3fold_pool - stores metadata for each z3fold pool 131d30561c5SVitaly Wool * @name: pool name 132d30561c5SVitaly Wool * @lock: protects pool unbuddied/lru lists 133d30561c5SVitaly Wool * @stale_lock: protects pool stale page list 134d30561c5SVitaly Wool * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 135d30561c5SVitaly Wool * buddies; the list each z3fold page is added to depends on 136d30561c5SVitaly Wool * the size of its free region. 1379a001fc1SVitaly Wool * @lru: list tracking the z3fold pages in LRU order by most recently 1389a001fc1SVitaly Wool * added buddy. 139d30561c5SVitaly Wool * @stale: list of pages marked for freeing 1409a001fc1SVitaly Wool * @pages_nr: number of z3fold pages in the pool. 1417c2b8baaSVitaly Wool * @c_handle: cache for z3fold_buddy_slots allocation 1429a001fc1SVitaly Wool * @ops: pointer to a structure of user defined operations specified at 1439a001fc1SVitaly Wool * pool creation time. 144d30561c5SVitaly Wool * @compact_wq: workqueue for page layout background optimization 145d30561c5SVitaly Wool * @release_wq: workqueue for safe page release 146d30561c5SVitaly Wool * @work: work_struct for safe page release 1471f862989SVitaly Wool * @inode: inode for z3fold pseudo filesystem 1489a001fc1SVitaly Wool * 1499a001fc1SVitaly Wool * This structure is allocated at pool creation time and maintains metadata 1509a001fc1SVitaly Wool * pertaining to a particular z3fold pool. 1519a001fc1SVitaly Wool */ 1529a001fc1SVitaly Wool struct z3fold_pool { 153d30561c5SVitaly Wool const char *name; 1549a001fc1SVitaly Wool spinlock_t lock; 155d30561c5SVitaly Wool spinlock_t stale_lock; 156d30561c5SVitaly Wool struct list_head *unbuddied; 1579a001fc1SVitaly Wool struct list_head lru; 158d30561c5SVitaly Wool struct list_head stale; 15912d59ae6SVitaly Wool atomic64_t pages_nr; 1607c2b8baaSVitaly Wool struct kmem_cache *c_handle; 1619a001fc1SVitaly Wool const struct z3fold_ops *ops; 1629a001fc1SVitaly Wool struct zpool *zpool; 1639a001fc1SVitaly Wool const struct zpool_ops *zpool_ops; 164d30561c5SVitaly Wool struct workqueue_struct *compact_wq; 165d30561c5SVitaly Wool struct workqueue_struct *release_wq; 166d30561c5SVitaly Wool struct work_struct work; 1671f862989SVitaly Wool struct inode *inode; 1689a001fc1SVitaly Wool }; 1699a001fc1SVitaly Wool 1709a001fc1SVitaly Wool /* 1719a001fc1SVitaly Wool * Internal z3fold page flags 1729a001fc1SVitaly Wool */ 1739a001fc1SVitaly Wool enum z3fold_page_flags { 1745a27aa82SVitaly Wool PAGE_HEADLESS = 0, 1759a001fc1SVitaly Wool MIDDLE_CHUNK_MAPPED, 176d30561c5SVitaly Wool NEEDS_COMPACTING, 1776098d7e1SVitaly Wool PAGE_STALE, 178ca0246bbSVitaly Wool PAGE_CLAIMED, /* by either reclaim or free */ 1799a001fc1SVitaly Wool }; 1809a001fc1SVitaly Wool 1819a001fc1SVitaly Wool /***************** 1829a001fc1SVitaly Wool * Helpers 1839a001fc1SVitaly Wool *****************/ 1849a001fc1SVitaly Wool 1859a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */ 1869a001fc1SVitaly Wool static int size_to_chunks(size_t size) 1879a001fc1SVitaly Wool { 1889a001fc1SVitaly Wool return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 1899a001fc1SVitaly Wool } 1909a001fc1SVitaly Wool 1919a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \ 1929a001fc1SVitaly Wool for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 1939a001fc1SVitaly Wool 194d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w); 195d30561c5SVitaly Wool 196bb9f6f63SVitaly Wool static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 197bb9f6f63SVitaly Wool gfp_t gfp) 1987c2b8baaSVitaly Wool { 199f1549cb5SHenry Burns struct z3fold_buddy_slots *slots; 200f1549cb5SHenry Burns 201f1549cb5SHenry Burns slots = kmem_cache_alloc(pool->c_handle, 202f1549cb5SHenry Burns (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); 2037c2b8baaSVitaly Wool 2047c2b8baaSVitaly Wool if (slots) { 2057c2b8baaSVitaly Wool memset(slots->slot, 0, sizeof(slots->slot)); 2067c2b8baaSVitaly Wool slots->pool = (unsigned long)pool; 2077c2b8baaSVitaly Wool } 2087c2b8baaSVitaly Wool 2097c2b8baaSVitaly Wool return slots; 2107c2b8baaSVitaly Wool } 2117c2b8baaSVitaly Wool 2127c2b8baaSVitaly Wool static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 2137c2b8baaSVitaly Wool { 2147c2b8baaSVitaly Wool return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 2157c2b8baaSVitaly Wool } 2167c2b8baaSVitaly Wool 2177c2b8baaSVitaly Wool static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 2187c2b8baaSVitaly Wool { 2197c2b8baaSVitaly Wool return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 2207c2b8baaSVitaly Wool } 2217c2b8baaSVitaly Wool 2227c2b8baaSVitaly Wool static inline void free_handle(unsigned long handle) 2237c2b8baaSVitaly Wool { 2247c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 2257c2b8baaSVitaly Wool int i; 2267c2b8baaSVitaly Wool bool is_free; 2277c2b8baaSVitaly Wool 2287c2b8baaSVitaly Wool if (handle & (1 << PAGE_HEADLESS)) 2297c2b8baaSVitaly Wool return; 2307c2b8baaSVitaly Wool 2317c2b8baaSVitaly Wool WARN_ON(*(unsigned long *)handle == 0); 2327c2b8baaSVitaly Wool *(unsigned long *)handle = 0; 2337c2b8baaSVitaly Wool slots = handle_to_slots(handle); 2347c2b8baaSVitaly Wool is_free = true; 2357c2b8baaSVitaly Wool for (i = 0; i <= BUDDY_MASK; i++) { 2367c2b8baaSVitaly Wool if (slots->slot[i]) { 2377c2b8baaSVitaly Wool is_free = false; 2387c2b8baaSVitaly Wool break; 2397c2b8baaSVitaly Wool } 2407c2b8baaSVitaly Wool } 2417c2b8baaSVitaly Wool 2427c2b8baaSVitaly Wool if (is_free) { 2437c2b8baaSVitaly Wool struct z3fold_pool *pool = slots_to_pool(slots); 2447c2b8baaSVitaly Wool 2457c2b8baaSVitaly Wool kmem_cache_free(pool->c_handle, slots); 2467c2b8baaSVitaly Wool } 2477c2b8baaSVitaly Wool } 2487c2b8baaSVitaly Wool 249ea8157abSDavid Howells static int z3fold_init_fs_context(struct fs_context *fc) 2501f862989SVitaly Wool { 251ea8157abSDavid Howells return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; 2521f862989SVitaly Wool } 2531f862989SVitaly Wool 2541f862989SVitaly Wool static struct file_system_type z3fold_fs = { 2551f862989SVitaly Wool .name = "z3fold", 256ea8157abSDavid Howells .init_fs_context = z3fold_init_fs_context, 2571f862989SVitaly Wool .kill_sb = kill_anon_super, 2581f862989SVitaly Wool }; 2591f862989SVitaly Wool 2601f862989SVitaly Wool static struct vfsmount *z3fold_mnt; 2611f862989SVitaly Wool static int z3fold_mount(void) 2621f862989SVitaly Wool { 2631f862989SVitaly Wool int ret = 0; 2641f862989SVitaly Wool 2651f862989SVitaly Wool z3fold_mnt = kern_mount(&z3fold_fs); 2661f862989SVitaly Wool if (IS_ERR(z3fold_mnt)) 2671f862989SVitaly Wool ret = PTR_ERR(z3fold_mnt); 2681f862989SVitaly Wool 2691f862989SVitaly Wool return ret; 2701f862989SVitaly Wool } 2711f862989SVitaly Wool 2721f862989SVitaly Wool static void z3fold_unmount(void) 2731f862989SVitaly Wool { 2741f862989SVitaly Wool kern_unmount(z3fold_mnt); 2751f862989SVitaly Wool } 2761f862989SVitaly Wool 2771f862989SVitaly Wool static const struct address_space_operations z3fold_aops; 2781f862989SVitaly Wool static int z3fold_register_migration(struct z3fold_pool *pool) 2791f862989SVitaly Wool { 2801f862989SVitaly Wool pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); 2811f862989SVitaly Wool if (IS_ERR(pool->inode)) { 2821f862989SVitaly Wool pool->inode = NULL; 2831f862989SVitaly Wool return 1; 2841f862989SVitaly Wool } 2851f862989SVitaly Wool 2861f862989SVitaly Wool pool->inode->i_mapping->private_data = pool; 2871f862989SVitaly Wool pool->inode->i_mapping->a_ops = &z3fold_aops; 2881f862989SVitaly Wool return 0; 2891f862989SVitaly Wool } 2901f862989SVitaly Wool 2911f862989SVitaly Wool static void z3fold_unregister_migration(struct z3fold_pool *pool) 2921f862989SVitaly Wool { 2931f862989SVitaly Wool if (pool->inode) 2941f862989SVitaly Wool iput(pool->inode); 2951f862989SVitaly Wool } 2961f862989SVitaly Wool 2979a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */ 298d30561c5SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page, 299bb9f6f63SVitaly Wool struct z3fold_pool *pool, gfp_t gfp) 3009a001fc1SVitaly Wool { 3019a001fc1SVitaly Wool struct z3fold_header *zhdr = page_address(page); 302bb9f6f63SVitaly Wool struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp); 3037c2b8baaSVitaly Wool 3047c2b8baaSVitaly Wool if (!slots) 3057c2b8baaSVitaly Wool return NULL; 3069a001fc1SVitaly Wool 3079a001fc1SVitaly Wool INIT_LIST_HEAD(&page->lru); 3089a001fc1SVitaly Wool clear_bit(PAGE_HEADLESS, &page->private); 3099a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 310d30561c5SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 311d30561c5SVitaly Wool clear_bit(PAGE_STALE, &page->private); 312ca0246bbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 3139a001fc1SVitaly Wool 3142f1e5e4dSVitaly Wool spin_lock_init(&zhdr->page_lock); 3155a27aa82SVitaly Wool kref_init(&zhdr->refcount); 3169a001fc1SVitaly Wool zhdr->first_chunks = 0; 3179a001fc1SVitaly Wool zhdr->middle_chunks = 0; 3189a001fc1SVitaly Wool zhdr->last_chunks = 0; 3199a001fc1SVitaly Wool zhdr->first_num = 0; 3209a001fc1SVitaly Wool zhdr->start_middle = 0; 321d30561c5SVitaly Wool zhdr->cpu = -1; 3227c2b8baaSVitaly Wool zhdr->slots = slots; 323bb9a374dSVitaly Wool zhdr->pool = pool; 3249a001fc1SVitaly Wool INIT_LIST_HEAD(&zhdr->buddy); 325d30561c5SVitaly Wool INIT_WORK(&zhdr->work, compact_page_work); 3269a001fc1SVitaly Wool return zhdr; 3279a001fc1SVitaly Wool } 3289a001fc1SVitaly Wool 3299a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */ 3301f862989SVitaly Wool static void free_z3fold_page(struct page *page, bool headless) 3319a001fc1SVitaly Wool { 3321f862989SVitaly Wool if (!headless) { 3331f862989SVitaly Wool lock_page(page); 3341f862989SVitaly Wool __ClearPageMovable(page); 3351f862989SVitaly Wool unlock_page(page); 3361f862989SVitaly Wool } 3371f862989SVitaly Wool ClearPagePrivate(page); 3385a27aa82SVitaly Wool __free_page(page); 3395a27aa82SVitaly Wool } 3405a27aa82SVitaly Wool 3412f1e5e4dSVitaly Wool /* Lock a z3fold page */ 3422f1e5e4dSVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr) 3432f1e5e4dSVitaly Wool { 3442f1e5e4dSVitaly Wool spin_lock(&zhdr->page_lock); 3452f1e5e4dSVitaly Wool } 3462f1e5e4dSVitaly Wool 34776e32a2aSVitaly Wool /* Try to lock a z3fold page */ 34876e32a2aSVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 34976e32a2aSVitaly Wool { 35076e32a2aSVitaly Wool return spin_trylock(&zhdr->page_lock); 35176e32a2aSVitaly Wool } 35276e32a2aSVitaly Wool 3532f1e5e4dSVitaly Wool /* Unlock a z3fold page */ 3542f1e5e4dSVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 3552f1e5e4dSVitaly Wool { 3562f1e5e4dSVitaly Wool spin_unlock(&zhdr->page_lock); 3572f1e5e4dSVitaly Wool } 3582f1e5e4dSVitaly Wool 3597c2b8baaSVitaly Wool /* Helper function to build the index */ 3607c2b8baaSVitaly Wool static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 3617c2b8baaSVitaly Wool { 3627c2b8baaSVitaly Wool return (bud + zhdr->first_num) & BUDDY_MASK; 3637c2b8baaSVitaly Wool } 3647c2b8baaSVitaly Wool 3659a001fc1SVitaly Wool /* 3669a001fc1SVitaly Wool * Encodes the handle of a particular buddy within a z3fold page 3679a001fc1SVitaly Wool * Pool lock should be held as this function accesses first_num 3689a001fc1SVitaly Wool */ 369*3f9d2b57SVitaly Wool static unsigned long __encode_handle(struct z3fold_header *zhdr, 370*3f9d2b57SVitaly Wool struct z3fold_buddy_slots *slots, 371*3f9d2b57SVitaly Wool enum buddy bud) 3729a001fc1SVitaly Wool { 3737c2b8baaSVitaly Wool unsigned long h = (unsigned long)zhdr; 3747c2b8baaSVitaly Wool int idx = 0; 3759a001fc1SVitaly Wool 3767c2b8baaSVitaly Wool /* 3777c2b8baaSVitaly Wool * For a headless page, its handle is its pointer with the extra 3787c2b8baaSVitaly Wool * PAGE_HEADLESS bit set 3797c2b8baaSVitaly Wool */ 3807c2b8baaSVitaly Wool if (bud == HEADLESS) 3817c2b8baaSVitaly Wool return h | (1 << PAGE_HEADLESS); 3827c2b8baaSVitaly Wool 3837c2b8baaSVitaly Wool /* otherwise, return pointer to encoded handle */ 3847c2b8baaSVitaly Wool idx = __idx(zhdr, bud); 3857c2b8baaSVitaly Wool h += idx; 386ca0246bbSVitaly Wool if (bud == LAST) 3877c2b8baaSVitaly Wool h |= (zhdr->last_chunks << BUDDY_SHIFT); 3887c2b8baaSVitaly Wool 3897c2b8baaSVitaly Wool slots->slot[idx] = h; 3907c2b8baaSVitaly Wool return (unsigned long)&slots->slot[idx]; 3919a001fc1SVitaly Wool } 3929a001fc1SVitaly Wool 393*3f9d2b57SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 394*3f9d2b57SVitaly Wool { 395*3f9d2b57SVitaly Wool return __encode_handle(zhdr, zhdr->slots, bud); 396*3f9d2b57SVitaly Wool } 397*3f9d2b57SVitaly Wool 3989a001fc1SVitaly Wool /* Returns the z3fold page where a given handle is stored */ 3991f862989SVitaly Wool static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h) 4009a001fc1SVitaly Wool { 4011f862989SVitaly Wool unsigned long addr = h; 4027c2b8baaSVitaly Wool 4037c2b8baaSVitaly Wool if (!(addr & (1 << PAGE_HEADLESS))) 4041f862989SVitaly Wool addr = *(unsigned long *)h; 4057c2b8baaSVitaly Wool 4067c2b8baaSVitaly Wool return (struct z3fold_header *)(addr & PAGE_MASK); 4079a001fc1SVitaly Wool } 4089a001fc1SVitaly Wool 409ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */ 410ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle) 411ca0246bbSVitaly Wool { 4127c2b8baaSVitaly Wool unsigned long addr = *(unsigned long *)handle; 4137c2b8baaSVitaly Wool 4147c2b8baaSVitaly Wool return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 415ca0246bbSVitaly Wool } 416ca0246bbSVitaly Wool 417f201ebd8Szhong jiang /* 418f201ebd8Szhong jiang * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 419f201ebd8Szhong jiang * but that doesn't matter. because the masking will result in the 420f201ebd8Szhong jiang * correct buddy number. 421f201ebd8Szhong jiang */ 4229a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle) 4239a001fc1SVitaly Wool { 4247c2b8baaSVitaly Wool struct z3fold_header *zhdr; 4257c2b8baaSVitaly Wool unsigned long addr; 4267c2b8baaSVitaly Wool 4277c2b8baaSVitaly Wool WARN_ON(handle & (1 << PAGE_HEADLESS)); 4287c2b8baaSVitaly Wool addr = *(unsigned long *)handle; 4297c2b8baaSVitaly Wool zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 4307c2b8baaSVitaly Wool return (addr - zhdr->first_num) & BUDDY_MASK; 4319a001fc1SVitaly Wool } 4329a001fc1SVitaly Wool 4339050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 4349050cce1SVitaly Wool { 435bb9a374dSVitaly Wool return zhdr->pool; 4369050cce1SVitaly Wool } 4379050cce1SVitaly Wool 438d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439d30561c5SVitaly Wool { 440d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 4419050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 442d30561c5SVitaly Wool 443d30561c5SVitaly Wool WARN_ON(!list_empty(&zhdr->buddy)); 444d30561c5SVitaly Wool set_bit(PAGE_STALE, &page->private); 44535529357SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 446d30561c5SVitaly Wool spin_lock(&pool->lock); 447d30561c5SVitaly Wool if (!list_empty(&page->lru)) 4481f862989SVitaly Wool list_del_init(&page->lru); 449d30561c5SVitaly Wool spin_unlock(&pool->lock); 450d30561c5SVitaly Wool if (locked) 451d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 452d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 453d30561c5SVitaly Wool list_add(&zhdr->buddy, &pool->stale); 454d30561c5SVitaly Wool queue_work(pool->release_wq, &pool->work); 455d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 456d30561c5SVitaly Wool } 457d30561c5SVitaly Wool 458d30561c5SVitaly Wool static void __attribute__((__unused__)) 459d30561c5SVitaly Wool release_z3fold_page(struct kref *ref) 460d30561c5SVitaly Wool { 461d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 462d30561c5SVitaly Wool refcount); 463d30561c5SVitaly Wool __release_z3fold_page(zhdr, false); 464d30561c5SVitaly Wool } 465d30561c5SVitaly Wool 466d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref) 467d30561c5SVitaly Wool { 468d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 469d30561c5SVitaly Wool refcount); 470d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 471d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 472d30561c5SVitaly Wool } 473d30561c5SVitaly Wool 474d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref) 475d30561c5SVitaly Wool { 476d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 477d30561c5SVitaly Wool refcount); 4789050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 4799050cce1SVitaly Wool spin_lock(&pool->lock); 480d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 4819050cce1SVitaly Wool spin_unlock(&pool->lock); 482d30561c5SVitaly Wool 483d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 484d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 485d30561c5SVitaly Wool } 486d30561c5SVitaly Wool 487d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w) 488d30561c5SVitaly Wool { 489d30561c5SVitaly Wool struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 490d30561c5SVitaly Wool 491d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 492d30561c5SVitaly Wool while (!list_empty(&pool->stale)) { 493d30561c5SVitaly Wool struct z3fold_header *zhdr = list_first_entry(&pool->stale, 494d30561c5SVitaly Wool struct z3fold_header, buddy); 495d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 496d30561c5SVitaly Wool 497d30561c5SVitaly Wool list_del(&zhdr->buddy); 498d30561c5SVitaly Wool if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 499d30561c5SVitaly Wool continue; 500d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 501d30561c5SVitaly Wool cancel_work_sync(&zhdr->work); 5021f862989SVitaly Wool free_z3fold_page(page, false); 503d30561c5SVitaly Wool cond_resched(); 504d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 505d30561c5SVitaly Wool } 506d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 507d30561c5SVitaly Wool } 508d30561c5SVitaly Wool 5099a001fc1SVitaly Wool /* 5109a001fc1SVitaly Wool * Returns the number of free chunks in a z3fold page. 5119a001fc1SVitaly Wool * NB: can't be used with HEADLESS pages. 5129a001fc1SVitaly Wool */ 5139a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr) 5149a001fc1SVitaly Wool { 5159a001fc1SVitaly Wool int nfree; 5169a001fc1SVitaly Wool /* 5179a001fc1SVitaly Wool * If there is a middle object, pick up the bigger free space 5189a001fc1SVitaly Wool * either before or after it. Otherwise just subtract the number 5199a001fc1SVitaly Wool * of chunks occupied by the first and the last objects. 5209a001fc1SVitaly Wool */ 5219a001fc1SVitaly Wool if (zhdr->middle_chunks != 0) { 5229a001fc1SVitaly Wool int nfree_before = zhdr->first_chunks ? 523ede93213SVitaly Wool 0 : zhdr->start_middle - ZHDR_CHUNKS; 5249a001fc1SVitaly Wool int nfree_after = zhdr->last_chunks ? 525ede93213SVitaly Wool 0 : TOTAL_CHUNKS - 526ede93213SVitaly Wool (zhdr->start_middle + zhdr->middle_chunks); 5279a001fc1SVitaly Wool nfree = max(nfree_before, nfree_after); 5289a001fc1SVitaly Wool } else 5299a001fc1SVitaly Wool nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 5309a001fc1SVitaly Wool return nfree; 5319a001fc1SVitaly Wool } 5329a001fc1SVitaly Wool 5339050cce1SVitaly Wool /* Add to the appropriate unbuddied list */ 5349050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool, 5359050cce1SVitaly Wool struct z3fold_header *zhdr) 5369050cce1SVitaly Wool { 5379050cce1SVitaly Wool if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 5389050cce1SVitaly Wool zhdr->middle_chunks == 0) { 5399050cce1SVitaly Wool struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); 5409050cce1SVitaly Wool 5419050cce1SVitaly Wool int freechunks = num_free_chunks(zhdr); 5429050cce1SVitaly Wool spin_lock(&pool->lock); 5439050cce1SVitaly Wool list_add(&zhdr->buddy, &unbuddied[freechunks]); 5449050cce1SVitaly Wool spin_unlock(&pool->lock); 5459050cce1SVitaly Wool zhdr->cpu = smp_processor_id(); 5469050cce1SVitaly Wool put_cpu_ptr(pool->unbuddied); 5479050cce1SVitaly Wool } 5489050cce1SVitaly Wool } 5499050cce1SVitaly Wool 550ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr, 551ede93213SVitaly Wool unsigned short dst_chunk) 552ede93213SVitaly Wool { 553ede93213SVitaly Wool void *beg = zhdr; 554ede93213SVitaly Wool return memmove(beg + (dst_chunk << CHUNK_SHIFT), 555ede93213SVitaly Wool beg + (zhdr->start_middle << CHUNK_SHIFT), 556ede93213SVitaly Wool zhdr->middle_chunks << CHUNK_SHIFT); 557ede93213SVitaly Wool } 558ede93213SVitaly Wool 5591b096e5aSVitaly Wool #define BIG_CHUNK_GAP 3 5609a001fc1SVitaly Wool /* Has to be called with lock held */ 5619a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr) 5629a001fc1SVitaly Wool { 5639a001fc1SVitaly Wool struct page *page = virt_to_page(zhdr); 5649a001fc1SVitaly Wool 565ede93213SVitaly Wool if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 566ede93213SVitaly Wool return 0; /* can't move middle chunk, it's used */ 5679a001fc1SVitaly Wool 5681f862989SVitaly Wool if (unlikely(PageIsolated(page))) 5691f862989SVitaly Wool return 0; 5701f862989SVitaly Wool 571ede93213SVitaly Wool if (zhdr->middle_chunks == 0) 572ede93213SVitaly Wool return 0; /* nothing to compact */ 573ede93213SVitaly Wool 574ede93213SVitaly Wool if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 575ede93213SVitaly Wool /* move to the beginning */ 576ede93213SVitaly Wool mchunk_memmove(zhdr, ZHDR_CHUNKS); 5779a001fc1SVitaly Wool zhdr->first_chunks = zhdr->middle_chunks; 5789a001fc1SVitaly Wool zhdr->middle_chunks = 0; 5799a001fc1SVitaly Wool zhdr->start_middle = 0; 5809a001fc1SVitaly Wool zhdr->first_num++; 581ede93213SVitaly Wool return 1; 5829a001fc1SVitaly Wool } 5839a001fc1SVitaly Wool 5841b096e5aSVitaly Wool /* 5851b096e5aSVitaly Wool * moving data is expensive, so let's only do that if 5861b096e5aSVitaly Wool * there's substantial gain (at least BIG_CHUNK_GAP chunks) 5871b096e5aSVitaly Wool */ 5881b096e5aSVitaly Wool if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 5891b096e5aSVitaly Wool zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 5901b096e5aSVitaly Wool BIG_CHUNK_GAP) { 5911b096e5aSVitaly Wool mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 5921b096e5aSVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 5931b096e5aSVitaly Wool return 1; 5941b096e5aSVitaly Wool } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 5951b096e5aSVitaly Wool TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 5961b096e5aSVitaly Wool + zhdr->middle_chunks) >= 5971b096e5aSVitaly Wool BIG_CHUNK_GAP) { 5981b096e5aSVitaly Wool unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 5991b096e5aSVitaly Wool zhdr->middle_chunks; 6001b096e5aSVitaly Wool mchunk_memmove(zhdr, new_start); 6011b096e5aSVitaly Wool zhdr->start_middle = new_start; 6021b096e5aSVitaly Wool return 1; 6031b096e5aSVitaly Wool } 6041b096e5aSVitaly Wool 6051b096e5aSVitaly Wool return 0; 6061b096e5aSVitaly Wool } 6071b096e5aSVitaly Wool 608d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked) 609d30561c5SVitaly Wool { 6109050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 611d30561c5SVitaly Wool struct page *page; 612d30561c5SVitaly Wool 613d30561c5SVitaly Wool page = virt_to_page(zhdr); 614d30561c5SVitaly Wool if (locked) 615d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 616d30561c5SVitaly Wool else 617d30561c5SVitaly Wool z3fold_page_lock(zhdr); 6185d03a661SVitaly Wool if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 619d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 620d30561c5SVitaly Wool return; 621d30561c5SVitaly Wool } 622d30561c5SVitaly Wool spin_lock(&pool->lock); 623d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 624d30561c5SVitaly Wool spin_unlock(&pool->lock); 625d30561c5SVitaly Wool 6265d03a661SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 6275d03a661SVitaly Wool atomic64_dec(&pool->pages_nr); 6285d03a661SVitaly Wool return; 6295d03a661SVitaly Wool } 6305d03a661SVitaly Wool 6311f862989SVitaly Wool if (unlikely(PageIsolated(page) || 632*3f9d2b57SVitaly Wool test_bit(PAGE_CLAIMED, &page->private) || 6331f862989SVitaly Wool test_bit(PAGE_STALE, &page->private))) { 6341f862989SVitaly Wool z3fold_page_unlock(zhdr); 6351f862989SVitaly Wool return; 6361f862989SVitaly Wool } 6371f862989SVitaly Wool 638d30561c5SVitaly Wool z3fold_compact_page(zhdr); 6399050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 640d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 641d30561c5SVitaly Wool } 642d30561c5SVitaly Wool 643d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w) 644d30561c5SVitaly Wool { 645d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 646d30561c5SVitaly Wool work); 647d30561c5SVitaly Wool 648d30561c5SVitaly Wool do_compact_page(zhdr, false); 649d30561c5SVitaly Wool } 650d30561c5SVitaly Wool 6519050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */ 6529050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 6539050cce1SVitaly Wool size_t size, bool can_sleep) 6549050cce1SVitaly Wool { 6559050cce1SVitaly Wool struct z3fold_header *zhdr = NULL; 6569050cce1SVitaly Wool struct page *page; 6579050cce1SVitaly Wool struct list_head *unbuddied; 6589050cce1SVitaly Wool int chunks = size_to_chunks(size), i; 6599050cce1SVitaly Wool 6609050cce1SVitaly Wool lookup: 6619050cce1SVitaly Wool /* First, try to find an unbuddied z3fold page. */ 6629050cce1SVitaly Wool unbuddied = get_cpu_ptr(pool->unbuddied); 6639050cce1SVitaly Wool for_each_unbuddied_list(i, chunks) { 6649050cce1SVitaly Wool struct list_head *l = &unbuddied[i]; 6659050cce1SVitaly Wool 6669050cce1SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 6679050cce1SVitaly Wool struct z3fold_header, buddy); 6689050cce1SVitaly Wool 6699050cce1SVitaly Wool if (!zhdr) 6709050cce1SVitaly Wool continue; 6719050cce1SVitaly Wool 6729050cce1SVitaly Wool /* Re-check under lock. */ 6739050cce1SVitaly Wool spin_lock(&pool->lock); 6749050cce1SVitaly Wool l = &unbuddied[i]; 6759050cce1SVitaly Wool if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 6769050cce1SVitaly Wool struct z3fold_header, buddy)) || 6779050cce1SVitaly Wool !z3fold_page_trylock(zhdr)) { 6789050cce1SVitaly Wool spin_unlock(&pool->lock); 6799050cce1SVitaly Wool zhdr = NULL; 6809050cce1SVitaly Wool put_cpu_ptr(pool->unbuddied); 6819050cce1SVitaly Wool if (can_sleep) 6829050cce1SVitaly Wool cond_resched(); 6839050cce1SVitaly Wool goto lookup; 6849050cce1SVitaly Wool } 6859050cce1SVitaly Wool list_del_init(&zhdr->buddy); 6869050cce1SVitaly Wool zhdr->cpu = -1; 6879050cce1SVitaly Wool spin_unlock(&pool->lock); 6889050cce1SVitaly Wool 6899050cce1SVitaly Wool page = virt_to_page(zhdr); 6909050cce1SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private)) { 6919050cce1SVitaly Wool z3fold_page_unlock(zhdr); 6929050cce1SVitaly Wool zhdr = NULL; 6939050cce1SVitaly Wool put_cpu_ptr(pool->unbuddied); 6949050cce1SVitaly Wool if (can_sleep) 6959050cce1SVitaly Wool cond_resched(); 6969050cce1SVitaly Wool goto lookup; 6979050cce1SVitaly Wool } 6989050cce1SVitaly Wool 6999050cce1SVitaly Wool /* 7009050cce1SVitaly Wool * this page could not be removed from its unbuddied 7019050cce1SVitaly Wool * list while pool lock was held, and then we've taken 7029050cce1SVitaly Wool * page lock so kref_put could not be called before 7039050cce1SVitaly Wool * we got here, so it's safe to just call kref_get() 7049050cce1SVitaly Wool */ 7059050cce1SVitaly Wool kref_get(&zhdr->refcount); 7069050cce1SVitaly Wool break; 7079050cce1SVitaly Wool } 7089050cce1SVitaly Wool put_cpu_ptr(pool->unbuddied); 7099050cce1SVitaly Wool 710351618b2SVitaly Wool if (!zhdr) { 711351618b2SVitaly Wool int cpu; 712351618b2SVitaly Wool 713351618b2SVitaly Wool /* look for _exact_ match on other cpus' lists */ 714351618b2SVitaly Wool for_each_online_cpu(cpu) { 715351618b2SVitaly Wool struct list_head *l; 716351618b2SVitaly Wool 717351618b2SVitaly Wool unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 718351618b2SVitaly Wool spin_lock(&pool->lock); 719351618b2SVitaly Wool l = &unbuddied[chunks]; 720351618b2SVitaly Wool 721351618b2SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 722351618b2SVitaly Wool struct z3fold_header, buddy); 723351618b2SVitaly Wool 724351618b2SVitaly Wool if (!zhdr || !z3fold_page_trylock(zhdr)) { 725351618b2SVitaly Wool spin_unlock(&pool->lock); 726351618b2SVitaly Wool zhdr = NULL; 727351618b2SVitaly Wool continue; 728351618b2SVitaly Wool } 729351618b2SVitaly Wool list_del_init(&zhdr->buddy); 730351618b2SVitaly Wool zhdr->cpu = -1; 731351618b2SVitaly Wool spin_unlock(&pool->lock); 732351618b2SVitaly Wool 733351618b2SVitaly Wool page = virt_to_page(zhdr); 734351618b2SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private)) { 735351618b2SVitaly Wool z3fold_page_unlock(zhdr); 736351618b2SVitaly Wool zhdr = NULL; 737351618b2SVitaly Wool if (can_sleep) 738351618b2SVitaly Wool cond_resched(); 739351618b2SVitaly Wool continue; 740351618b2SVitaly Wool } 741351618b2SVitaly Wool kref_get(&zhdr->refcount); 742351618b2SVitaly Wool break; 743351618b2SVitaly Wool } 744351618b2SVitaly Wool } 745351618b2SVitaly Wool 7469050cce1SVitaly Wool return zhdr; 7479050cce1SVitaly Wool } 748d30561c5SVitaly Wool 749d30561c5SVitaly Wool /* 750d30561c5SVitaly Wool * API Functions 751d30561c5SVitaly Wool */ 752d30561c5SVitaly Wool 753d30561c5SVitaly Wool /** 754d30561c5SVitaly Wool * z3fold_create_pool() - create a new z3fold pool 755d30561c5SVitaly Wool * @name: pool name 756d30561c5SVitaly Wool * @gfp: gfp flags when allocating the z3fold pool structure 757d30561c5SVitaly Wool * @ops: user-defined operations for the z3fold pool 758d30561c5SVitaly Wool * 759d30561c5SVitaly Wool * Return: pointer to the new z3fold pool or NULL if the metadata allocation 760d30561c5SVitaly Wool * failed. 761d30561c5SVitaly Wool */ 762d30561c5SVitaly Wool static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 763d30561c5SVitaly Wool const struct z3fold_ops *ops) 764d30561c5SVitaly Wool { 765d30561c5SVitaly Wool struct z3fold_pool *pool = NULL; 766d30561c5SVitaly Wool int i, cpu; 767d30561c5SVitaly Wool 768d30561c5SVitaly Wool pool = kzalloc(sizeof(struct z3fold_pool), gfp); 769d30561c5SVitaly Wool if (!pool) 770d30561c5SVitaly Wool goto out; 7717c2b8baaSVitaly Wool pool->c_handle = kmem_cache_create("z3fold_handle", 7727c2b8baaSVitaly Wool sizeof(struct z3fold_buddy_slots), 7737c2b8baaSVitaly Wool SLOTS_ALIGN, 0, NULL); 7747c2b8baaSVitaly Wool if (!pool->c_handle) 7757c2b8baaSVitaly Wool goto out_c; 776d30561c5SVitaly Wool spin_lock_init(&pool->lock); 777d30561c5SVitaly Wool spin_lock_init(&pool->stale_lock); 778d30561c5SVitaly Wool pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 7791ec6995dSXidong Wang if (!pool->unbuddied) 7801ec6995dSXidong Wang goto out_pool; 781d30561c5SVitaly Wool for_each_possible_cpu(cpu) { 782d30561c5SVitaly Wool struct list_head *unbuddied = 783d30561c5SVitaly Wool per_cpu_ptr(pool->unbuddied, cpu); 784d30561c5SVitaly Wool for_each_unbuddied_list(i, 0) 785d30561c5SVitaly Wool INIT_LIST_HEAD(&unbuddied[i]); 786d30561c5SVitaly Wool } 787d30561c5SVitaly Wool INIT_LIST_HEAD(&pool->lru); 788d30561c5SVitaly Wool INIT_LIST_HEAD(&pool->stale); 789d30561c5SVitaly Wool atomic64_set(&pool->pages_nr, 0); 790d30561c5SVitaly Wool pool->name = name; 791d30561c5SVitaly Wool pool->compact_wq = create_singlethread_workqueue(pool->name); 792d30561c5SVitaly Wool if (!pool->compact_wq) 7931ec6995dSXidong Wang goto out_unbuddied; 794d30561c5SVitaly Wool pool->release_wq = create_singlethread_workqueue(pool->name); 795d30561c5SVitaly Wool if (!pool->release_wq) 796d30561c5SVitaly Wool goto out_wq; 7971f862989SVitaly Wool if (z3fold_register_migration(pool)) 7981f862989SVitaly Wool goto out_rwq; 799d30561c5SVitaly Wool INIT_WORK(&pool->work, free_pages_work); 800d30561c5SVitaly Wool pool->ops = ops; 801d30561c5SVitaly Wool return pool; 802d30561c5SVitaly Wool 8031f862989SVitaly Wool out_rwq: 8041f862989SVitaly Wool destroy_workqueue(pool->release_wq); 805d30561c5SVitaly Wool out_wq: 806d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 8071ec6995dSXidong Wang out_unbuddied: 8081ec6995dSXidong Wang free_percpu(pool->unbuddied); 8091ec6995dSXidong Wang out_pool: 8107c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 8117c2b8baaSVitaly Wool out_c: 812d30561c5SVitaly Wool kfree(pool); 8131ec6995dSXidong Wang out: 814d30561c5SVitaly Wool return NULL; 815d30561c5SVitaly Wool } 816d30561c5SVitaly Wool 817d30561c5SVitaly Wool /** 818d30561c5SVitaly Wool * z3fold_destroy_pool() - destroys an existing z3fold pool 819d30561c5SVitaly Wool * @pool: the z3fold pool to be destroyed 820d30561c5SVitaly Wool * 821d30561c5SVitaly Wool * The pool should be emptied before this function is called. 822d30561c5SVitaly Wool */ 823d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool) 824d30561c5SVitaly Wool { 8257c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 8266051d3bdSHenry Burns 8276051d3bdSHenry Burns /* 8286051d3bdSHenry Burns * We need to destroy pool->compact_wq before pool->release_wq, 8296051d3bdSHenry Burns * as any pending work on pool->compact_wq will call 8306051d3bdSHenry Burns * queue_work(pool->release_wq, &pool->work). 831b997052bSHenry Burns * 832b997052bSHenry Burns * There are still outstanding pages until both workqueues are drained, 833b997052bSHenry Burns * so we cannot unregister migration until then. 8346051d3bdSHenry Burns */ 8356051d3bdSHenry Burns 836d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 8376051d3bdSHenry Burns destroy_workqueue(pool->release_wq); 838b997052bSHenry Burns z3fold_unregister_migration(pool); 839d30561c5SVitaly Wool kfree(pool); 840d30561c5SVitaly Wool } 841d30561c5SVitaly Wool 8429a001fc1SVitaly Wool /** 8439a001fc1SVitaly Wool * z3fold_alloc() - allocates a region of a given size 8449a001fc1SVitaly Wool * @pool: z3fold pool from which to allocate 8459a001fc1SVitaly Wool * @size: size in bytes of the desired allocation 8469a001fc1SVitaly Wool * @gfp: gfp flags used if the pool needs to grow 8479a001fc1SVitaly Wool * @handle: handle of the new allocation 8489a001fc1SVitaly Wool * 8499a001fc1SVitaly Wool * This function will attempt to find a free region in the pool large enough to 8509a001fc1SVitaly Wool * satisfy the allocation request. A search of the unbuddied lists is 8519a001fc1SVitaly Wool * performed first. If no suitable free region is found, then a new page is 8529a001fc1SVitaly Wool * allocated and added to the pool to satisfy the request. 8539a001fc1SVitaly Wool * 8549a001fc1SVitaly Wool * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 8559a001fc1SVitaly Wool * as z3fold pool pages. 8569a001fc1SVitaly Wool * 8579a001fc1SVitaly Wool * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 8589a001fc1SVitaly Wool * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 8599a001fc1SVitaly Wool * a new page. 8609a001fc1SVitaly Wool */ 8619a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 8629a001fc1SVitaly Wool unsigned long *handle) 8639a001fc1SVitaly Wool { 8649050cce1SVitaly Wool int chunks = size_to_chunks(size); 8659a001fc1SVitaly Wool struct z3fold_header *zhdr = NULL; 866d30561c5SVitaly Wool struct page *page = NULL; 8679a001fc1SVitaly Wool enum buddy bud; 8688a97ea54SMatthew Wilcox bool can_sleep = gfpflags_allow_blocking(gfp); 8699a001fc1SVitaly Wool 870f1549cb5SHenry Burns if (!size) 8719a001fc1SVitaly Wool return -EINVAL; 8729a001fc1SVitaly Wool 8739a001fc1SVitaly Wool if (size > PAGE_SIZE) 8749a001fc1SVitaly Wool return -ENOSPC; 8759a001fc1SVitaly Wool 8769a001fc1SVitaly Wool if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 8779a001fc1SVitaly Wool bud = HEADLESS; 8789a001fc1SVitaly Wool else { 8799050cce1SVitaly Wool retry: 8809050cce1SVitaly Wool zhdr = __z3fold_alloc(pool, size, can_sleep); 881d30561c5SVitaly Wool if (zhdr) { 8829a001fc1SVitaly Wool if (zhdr->first_chunks == 0) { 8839a001fc1SVitaly Wool if (zhdr->middle_chunks != 0 && 8849a001fc1SVitaly Wool chunks >= zhdr->start_middle) 8859a001fc1SVitaly Wool bud = LAST; 8869a001fc1SVitaly Wool else 8879a001fc1SVitaly Wool bud = FIRST; 8889a001fc1SVitaly Wool } else if (zhdr->last_chunks == 0) 8899a001fc1SVitaly Wool bud = LAST; 8909a001fc1SVitaly Wool else if (zhdr->middle_chunks == 0) 8919a001fc1SVitaly Wool bud = MIDDLE; 8929a001fc1SVitaly Wool else { 8935a27aa82SVitaly Wool if (kref_put(&zhdr->refcount, 894d30561c5SVitaly Wool release_z3fold_page_locked)) 8955a27aa82SVitaly Wool atomic64_dec(&pool->pages_nr); 896d30561c5SVitaly Wool else 897d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 8989a001fc1SVitaly Wool pr_err("No free chunks in unbuddied\n"); 8999a001fc1SVitaly Wool WARN_ON(1); 9009050cce1SVitaly Wool goto retry; 9019a001fc1SVitaly Wool } 9029050cce1SVitaly Wool page = virt_to_page(zhdr); 9039a001fc1SVitaly Wool goto found; 9049a001fc1SVitaly Wool } 9059a001fc1SVitaly Wool bud = FIRST; 9069a001fc1SVitaly Wool } 9079a001fc1SVitaly Wool 9085c9bab59SVitaly Wool page = NULL; 9095c9bab59SVitaly Wool if (can_sleep) { 910d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 911d30561c5SVitaly Wool zhdr = list_first_entry_or_null(&pool->stale, 912d30561c5SVitaly Wool struct z3fold_header, buddy); 913d30561c5SVitaly Wool /* 9145c9bab59SVitaly Wool * Before allocating a page, let's see if we can take one from 9155c9bab59SVitaly Wool * the stale pages list. cancel_work_sync() can sleep so we 9165c9bab59SVitaly Wool * limit this case to the contexts where we can sleep 917d30561c5SVitaly Wool */ 9185c9bab59SVitaly Wool if (zhdr) { 919d30561c5SVitaly Wool list_del(&zhdr->buddy); 920d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 921d30561c5SVitaly Wool cancel_work_sync(&zhdr->work); 922d30561c5SVitaly Wool page = virt_to_page(zhdr); 923d30561c5SVitaly Wool } else { 924d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 925d30561c5SVitaly Wool } 9265c9bab59SVitaly Wool } 9275c9bab59SVitaly Wool if (!page) 9285c9bab59SVitaly Wool page = alloc_page(gfp); 929d30561c5SVitaly Wool 9309a001fc1SVitaly Wool if (!page) 9319a001fc1SVitaly Wool return -ENOMEM; 9322f1e5e4dSVitaly Wool 933bb9f6f63SVitaly Wool zhdr = init_z3fold_page(page, pool, gfp); 9349050cce1SVitaly Wool if (!zhdr) { 9359050cce1SVitaly Wool __free_page(page); 9369050cce1SVitaly Wool return -ENOMEM; 9379050cce1SVitaly Wool } 9389050cce1SVitaly Wool atomic64_inc(&pool->pages_nr); 9399a001fc1SVitaly Wool 9409a001fc1SVitaly Wool if (bud == HEADLESS) { 9419a001fc1SVitaly Wool set_bit(PAGE_HEADLESS, &page->private); 9429a001fc1SVitaly Wool goto headless; 9439a001fc1SVitaly Wool } 944810481a2SHenry Burns if (can_sleep) { 945810481a2SHenry Burns lock_page(page); 9461f862989SVitaly Wool __SetPageMovable(page, pool->inode->i_mapping); 947810481a2SHenry Burns unlock_page(page); 948810481a2SHenry Burns } else { 949810481a2SHenry Burns if (trylock_page(page)) { 950810481a2SHenry Burns __SetPageMovable(page, pool->inode->i_mapping); 951810481a2SHenry Burns unlock_page(page); 952810481a2SHenry Burns } 953810481a2SHenry Burns } 9542f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 9559a001fc1SVitaly Wool 9569a001fc1SVitaly Wool found: 9579a001fc1SVitaly Wool if (bud == FIRST) 9589a001fc1SVitaly Wool zhdr->first_chunks = chunks; 9599a001fc1SVitaly Wool else if (bud == LAST) 9609a001fc1SVitaly Wool zhdr->last_chunks = chunks; 9619a001fc1SVitaly Wool else { 9629a001fc1SVitaly Wool zhdr->middle_chunks = chunks; 963ede93213SVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 9649a001fc1SVitaly Wool } 9659050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 9669a001fc1SVitaly Wool 9679a001fc1SVitaly Wool headless: 968d30561c5SVitaly Wool spin_lock(&pool->lock); 9699a001fc1SVitaly Wool /* Add/move z3fold page to beginning of LRU */ 9709a001fc1SVitaly Wool if (!list_empty(&page->lru)) 9719a001fc1SVitaly Wool list_del(&page->lru); 9729a001fc1SVitaly Wool 9739a001fc1SVitaly Wool list_add(&page->lru, &pool->lru); 9749a001fc1SVitaly Wool 9759a001fc1SVitaly Wool *handle = encode_handle(zhdr, bud); 9769a001fc1SVitaly Wool spin_unlock(&pool->lock); 9772f1e5e4dSVitaly Wool if (bud != HEADLESS) 9782f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 9799a001fc1SVitaly Wool 9809a001fc1SVitaly Wool return 0; 9819a001fc1SVitaly Wool } 9829a001fc1SVitaly Wool 9839a001fc1SVitaly Wool /** 9849a001fc1SVitaly Wool * z3fold_free() - frees the allocation associated with the given handle 9859a001fc1SVitaly Wool * @pool: pool in which the allocation resided 9869a001fc1SVitaly Wool * @handle: handle associated with the allocation returned by z3fold_alloc() 9879a001fc1SVitaly Wool * 9889a001fc1SVitaly Wool * In the case that the z3fold page in which the allocation resides is under 9899a001fc1SVitaly Wool * reclaim, as indicated by the PG_reclaim flag being set, this function 9909a001fc1SVitaly Wool * only sets the first|last_chunks to 0. The page is actually freed 9919a001fc1SVitaly Wool * once both buddies are evicted (see z3fold_reclaim_page() below). 9929a001fc1SVitaly Wool */ 9939a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 9949a001fc1SVitaly Wool { 9959a001fc1SVitaly Wool struct z3fold_header *zhdr; 9969a001fc1SVitaly Wool struct page *page; 9979a001fc1SVitaly Wool enum buddy bud; 9989a001fc1SVitaly Wool 9999a001fc1SVitaly Wool zhdr = handle_to_z3fold_header(handle); 10009a001fc1SVitaly Wool page = virt_to_page(zhdr); 10019a001fc1SVitaly Wool 10029a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) { 1003ca0246bbSVitaly Wool /* if a headless page is under reclaim, just leave. 1004ca0246bbSVitaly Wool * NB: we use test_and_set_bit for a reason: if the bit 1005ca0246bbSVitaly Wool * has not been set before, we release this page 1006ca0246bbSVitaly Wool * immediately so we don't care about its value any more. 1007ca0246bbSVitaly Wool */ 1008ca0246bbSVitaly Wool if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1009ca0246bbSVitaly Wool spin_lock(&pool->lock); 1010ca0246bbSVitaly Wool list_del(&page->lru); 1011ca0246bbSVitaly Wool spin_unlock(&pool->lock); 10121f862989SVitaly Wool free_z3fold_page(page, true); 1013ca0246bbSVitaly Wool atomic64_dec(&pool->pages_nr); 1014ca0246bbSVitaly Wool } 1015ca0246bbSVitaly Wool return; 1016ca0246bbSVitaly Wool } 1017ca0246bbSVitaly Wool 1018ca0246bbSVitaly Wool /* Non-headless case */ 10192f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 102043afc194SVitaly Wool bud = handle_to_buddy(handle); 10219a001fc1SVitaly Wool 10229a001fc1SVitaly Wool switch (bud) { 10239a001fc1SVitaly Wool case FIRST: 10249a001fc1SVitaly Wool zhdr->first_chunks = 0; 10259a001fc1SVitaly Wool break; 10269a001fc1SVitaly Wool case MIDDLE: 10279a001fc1SVitaly Wool zhdr->middle_chunks = 0; 10289a001fc1SVitaly Wool break; 10299a001fc1SVitaly Wool case LAST: 10309a001fc1SVitaly Wool zhdr->last_chunks = 0; 10319a001fc1SVitaly Wool break; 10329a001fc1SVitaly Wool default: 10339a001fc1SVitaly Wool pr_err("%s: unknown bud %d\n", __func__, bud); 10349a001fc1SVitaly Wool WARN_ON(1); 10352f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 10369a001fc1SVitaly Wool return; 10379a001fc1SVitaly Wool } 10389a001fc1SVitaly Wool 10397c2b8baaSVitaly Wool free_handle(handle); 1040d30561c5SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1041d30561c5SVitaly Wool atomic64_dec(&pool->pages_nr); 1042d30561c5SVitaly Wool return; 1043d30561c5SVitaly Wool } 1044ca0246bbSVitaly Wool if (test_bit(PAGE_CLAIMED, &page->private)) { 10456098d7e1SVitaly Wool z3fold_page_unlock(zhdr); 10466098d7e1SVitaly Wool return; 10476098d7e1SVitaly Wool } 10481f862989SVitaly Wool if (unlikely(PageIsolated(page)) || 10491f862989SVitaly Wool test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1050d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 1051d30561c5SVitaly Wool return; 1052d30561c5SVitaly Wool } 1053d30561c5SVitaly Wool if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1054d30561c5SVitaly Wool spin_lock(&pool->lock); 1055d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 1056d30561c5SVitaly Wool spin_unlock(&pool->lock); 1057d30561c5SVitaly Wool zhdr->cpu = -1; 10585d03a661SVitaly Wool kref_get(&zhdr->refcount); 1059d30561c5SVitaly Wool do_compact_page(zhdr, true); 1060d30561c5SVitaly Wool return; 1061d30561c5SVitaly Wool } 10625d03a661SVitaly Wool kref_get(&zhdr->refcount); 1063d30561c5SVitaly Wool queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1064d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 10659a001fc1SVitaly Wool } 10669a001fc1SVitaly Wool 10679a001fc1SVitaly Wool /** 10689a001fc1SVitaly Wool * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 10699a001fc1SVitaly Wool * @pool: pool from which a page will attempt to be evicted 1070f144c390SMike Rapoport * @retries: number of pages on the LRU list for which eviction will 10719a001fc1SVitaly Wool * be attempted before failing 10729a001fc1SVitaly Wool * 10739a001fc1SVitaly Wool * z3fold reclaim is different from normal system reclaim in that it is done 10749a001fc1SVitaly Wool * from the bottom, up. This is because only the bottom layer, z3fold, has 10759a001fc1SVitaly Wool * information on how the allocations are organized within each z3fold page. 10769a001fc1SVitaly Wool * This has the potential to create interesting locking situations between 10779a001fc1SVitaly Wool * z3fold and the user, however. 10789a001fc1SVitaly Wool * 10799a001fc1SVitaly Wool * To avoid these, this is how z3fold_reclaim_page() should be called: 1080f144c390SMike Rapoport * 10819a001fc1SVitaly Wool * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 10829a001fc1SVitaly Wool * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 10839a001fc1SVitaly Wool * call the user-defined eviction handler with the pool and handle as 10849a001fc1SVitaly Wool * arguments. 10859a001fc1SVitaly Wool * 10869a001fc1SVitaly Wool * If the handle can not be evicted, the eviction handler should return 10879a001fc1SVitaly Wool * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 10889a001fc1SVitaly Wool * appropriate list and try the next z3fold page on the LRU up to 10899a001fc1SVitaly Wool * a user defined number of retries. 10909a001fc1SVitaly Wool * 10919a001fc1SVitaly Wool * If the handle is successfully evicted, the eviction handler should 10929a001fc1SVitaly Wool * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 10939a001fc1SVitaly Wool * contains logic to delay freeing the page if the page is under reclaim, 10949a001fc1SVitaly Wool * as indicated by the setting of the PG_reclaim flag on the underlying page. 10959a001fc1SVitaly Wool * 10969a001fc1SVitaly Wool * If all buddies in the z3fold page are successfully evicted, then the 10979a001fc1SVitaly Wool * z3fold page can be freed. 10989a001fc1SVitaly Wool * 10999a001fc1SVitaly Wool * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 11009a001fc1SVitaly Wool * no pages to evict or an eviction handler is not registered, -EAGAIN if 11019a001fc1SVitaly Wool * the retry limit was hit. 11029a001fc1SVitaly Wool */ 11039a001fc1SVitaly Wool static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 11049a001fc1SVitaly Wool { 1105d30561c5SVitaly Wool int i, ret = 0; 1106d30561c5SVitaly Wool struct z3fold_header *zhdr = NULL; 1107d30561c5SVitaly Wool struct page *page = NULL; 1108d30561c5SVitaly Wool struct list_head *pos; 1109*3f9d2b57SVitaly Wool struct z3fold_buddy_slots slots; 11109a001fc1SVitaly Wool unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 11119a001fc1SVitaly Wool 11129a001fc1SVitaly Wool spin_lock(&pool->lock); 11132f1e5e4dSVitaly Wool if (!pool->ops || !pool->ops->evict || retries == 0) { 11149a001fc1SVitaly Wool spin_unlock(&pool->lock); 11159a001fc1SVitaly Wool return -EINVAL; 11169a001fc1SVitaly Wool } 11179a001fc1SVitaly Wool for (i = 0; i < retries; i++) { 11182f1e5e4dSVitaly Wool if (list_empty(&pool->lru)) { 11192f1e5e4dSVitaly Wool spin_unlock(&pool->lock); 11202f1e5e4dSVitaly Wool return -EINVAL; 11212f1e5e4dSVitaly Wool } 1122d30561c5SVitaly Wool list_for_each_prev(pos, &pool->lru) { 1123d30561c5SVitaly Wool page = list_entry(pos, struct page, lru); 1124ca0246bbSVitaly Wool 1125ca0246bbSVitaly Wool /* this bit could have been set by free, in which case 1126ca0246bbSVitaly Wool * we pass over to the next page in the pool. 1127ca0246bbSVitaly Wool */ 1128*3f9d2b57SVitaly Wool if (test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1129*3f9d2b57SVitaly Wool page = NULL; 1130ca0246bbSVitaly Wool continue; 1131*3f9d2b57SVitaly Wool } 11329a001fc1SVitaly Wool 1133*3f9d2b57SVitaly Wool if (unlikely(PageIsolated(page))) { 1134*3f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 1135*3f9d2b57SVitaly Wool page = NULL; 11361f862989SVitaly Wool continue; 1137*3f9d2b57SVitaly Wool } 1138*3f9d2b57SVitaly Wool zhdr = page_address(page); 1139ca0246bbSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 1140ca0246bbSVitaly Wool break; 1141ca0246bbSVitaly Wool 1142ca0246bbSVitaly Wool if (!z3fold_page_trylock(zhdr)) { 1143*3f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 1144ca0246bbSVitaly Wool zhdr = NULL; 1145d30561c5SVitaly Wool continue; /* can't evict at this point */ 1146ca0246bbSVitaly Wool } 11475a27aa82SVitaly Wool kref_get(&zhdr->refcount); 1148d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 1149d30561c5SVitaly Wool zhdr->cpu = -1; 11506098d7e1SVitaly Wool break; 1151d30561c5SVitaly Wool } 1152d30561c5SVitaly Wool 1153ca0246bbSVitaly Wool if (!zhdr) 1154ca0246bbSVitaly Wool break; 1155ca0246bbSVitaly Wool 1156d30561c5SVitaly Wool list_del_init(&page->lru); 11572f1e5e4dSVitaly Wool spin_unlock(&pool->lock); 1158d30561c5SVitaly Wool 1159d30561c5SVitaly Wool if (!test_bit(PAGE_HEADLESS, &page->private)) { 11609a001fc1SVitaly Wool /* 1161*3f9d2b57SVitaly Wool * We need encode the handles before unlocking, and 1162*3f9d2b57SVitaly Wool * use our local slots structure because z3fold_free 1163*3f9d2b57SVitaly Wool * can zero out zhdr->slots and we can't do much 1164*3f9d2b57SVitaly Wool * about that 11659a001fc1SVitaly Wool */ 11669a001fc1SVitaly Wool first_handle = 0; 11679a001fc1SVitaly Wool last_handle = 0; 11689a001fc1SVitaly Wool middle_handle = 0; 11699a001fc1SVitaly Wool if (zhdr->first_chunks) 1170*3f9d2b57SVitaly Wool first_handle = __encode_handle(zhdr, &slots, 1171*3f9d2b57SVitaly Wool FIRST); 11729a001fc1SVitaly Wool if (zhdr->middle_chunks) 1173*3f9d2b57SVitaly Wool middle_handle = __encode_handle(zhdr, &slots, 1174*3f9d2b57SVitaly Wool MIDDLE); 11759a001fc1SVitaly Wool if (zhdr->last_chunks) 1176*3f9d2b57SVitaly Wool last_handle = __encode_handle(zhdr, &slots, 1177*3f9d2b57SVitaly Wool LAST); 1178d30561c5SVitaly Wool /* 1179d30561c5SVitaly Wool * it's safe to unlock here because we hold a 1180d30561c5SVitaly Wool * reference to this page 1181d30561c5SVitaly Wool */ 11822f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 11839a001fc1SVitaly Wool } else { 1184*3f9d2b57SVitaly Wool first_handle = __encode_handle(zhdr, &slots, HEADLESS); 11859a001fc1SVitaly Wool last_handle = middle_handle = 0; 11862f1e5e4dSVitaly Wool } 11879a001fc1SVitaly Wool 11889a001fc1SVitaly Wool /* Issue the eviction callback(s) */ 11899a001fc1SVitaly Wool if (middle_handle) { 11909a001fc1SVitaly Wool ret = pool->ops->evict(pool, middle_handle); 11919a001fc1SVitaly Wool if (ret) 11929a001fc1SVitaly Wool goto next; 11939a001fc1SVitaly Wool } 11949a001fc1SVitaly Wool if (first_handle) { 11959a001fc1SVitaly Wool ret = pool->ops->evict(pool, first_handle); 11969a001fc1SVitaly Wool if (ret) 11979a001fc1SVitaly Wool goto next; 11989a001fc1SVitaly Wool } 11999a001fc1SVitaly Wool if (last_handle) { 12009a001fc1SVitaly Wool ret = pool->ops->evict(pool, last_handle); 12019a001fc1SVitaly Wool if (ret) 12029a001fc1SVitaly Wool goto next; 12039a001fc1SVitaly Wool } 12049a001fc1SVitaly Wool next: 12055a27aa82SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) { 12065a27aa82SVitaly Wool if (ret == 0) { 12071f862989SVitaly Wool free_z3fold_page(page, true); 1208ca0246bbSVitaly Wool atomic64_dec(&pool->pages_nr); 12099a001fc1SVitaly Wool return 0; 12105a27aa82SVitaly Wool } 12116098d7e1SVitaly Wool spin_lock(&pool->lock); 12126098d7e1SVitaly Wool list_add(&page->lru, &pool->lru); 1213d5567c9dSVitaly Wool spin_unlock(&pool->lock); 1214*3f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 12156098d7e1SVitaly Wool } else { 12166098d7e1SVitaly Wool z3fold_page_lock(zhdr); 12176098d7e1SVitaly Wool if (kref_put(&zhdr->refcount, 12186098d7e1SVitaly Wool release_z3fold_page_locked)) { 12196098d7e1SVitaly Wool atomic64_dec(&pool->pages_nr); 12205a27aa82SVitaly Wool return 0; 12215a27aa82SVitaly Wool } 12225a27aa82SVitaly Wool /* 12236098d7e1SVitaly Wool * if we are here, the page is still not completely 12246098d7e1SVitaly Wool * free. Take the global pool lock then to be able 12256098d7e1SVitaly Wool * to add it back to the lru list 12265a27aa82SVitaly Wool */ 12276098d7e1SVitaly Wool spin_lock(&pool->lock); 12289a001fc1SVitaly Wool list_add(&page->lru, &pool->lru); 12296098d7e1SVitaly Wool spin_unlock(&pool->lock); 12306098d7e1SVitaly Wool z3fold_page_unlock(zhdr); 1231*3f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 12326098d7e1SVitaly Wool } 12336098d7e1SVitaly Wool 12346098d7e1SVitaly Wool /* We started off locked to we need to lock the pool back */ 12356098d7e1SVitaly Wool spin_lock(&pool->lock); 12369a001fc1SVitaly Wool } 12379a001fc1SVitaly Wool spin_unlock(&pool->lock); 12389a001fc1SVitaly Wool return -EAGAIN; 12399a001fc1SVitaly Wool } 12409a001fc1SVitaly Wool 12419a001fc1SVitaly Wool /** 12429a001fc1SVitaly Wool * z3fold_map() - maps the allocation associated with the given handle 12439a001fc1SVitaly Wool * @pool: pool in which the allocation resides 12449a001fc1SVitaly Wool * @handle: handle associated with the allocation to be mapped 12459a001fc1SVitaly Wool * 12469a001fc1SVitaly Wool * Extracts the buddy number from handle and constructs the pointer to the 12479a001fc1SVitaly Wool * correct starting chunk within the page. 12489a001fc1SVitaly Wool * 12499a001fc1SVitaly Wool * Returns: a pointer to the mapped allocation 12509a001fc1SVitaly Wool */ 12519a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 12529a001fc1SVitaly Wool { 12539a001fc1SVitaly Wool struct z3fold_header *zhdr; 12549a001fc1SVitaly Wool struct page *page; 12559a001fc1SVitaly Wool void *addr; 12569a001fc1SVitaly Wool enum buddy buddy; 12579a001fc1SVitaly Wool 12589a001fc1SVitaly Wool zhdr = handle_to_z3fold_header(handle); 12599a001fc1SVitaly Wool addr = zhdr; 12609a001fc1SVitaly Wool page = virt_to_page(zhdr); 12619a001fc1SVitaly Wool 12629a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 12639a001fc1SVitaly Wool goto out; 12649a001fc1SVitaly Wool 12652f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 12669a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 12679a001fc1SVitaly Wool switch (buddy) { 12689a001fc1SVitaly Wool case FIRST: 12699a001fc1SVitaly Wool addr += ZHDR_SIZE_ALIGNED; 12709a001fc1SVitaly Wool break; 12719a001fc1SVitaly Wool case MIDDLE: 12729a001fc1SVitaly Wool addr += zhdr->start_middle << CHUNK_SHIFT; 12739a001fc1SVitaly Wool set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 12749a001fc1SVitaly Wool break; 12759a001fc1SVitaly Wool case LAST: 1276ca0246bbSVitaly Wool addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 12779a001fc1SVitaly Wool break; 12789a001fc1SVitaly Wool default: 12799a001fc1SVitaly Wool pr_err("unknown buddy id %d\n", buddy); 12809a001fc1SVitaly Wool WARN_ON(1); 12819a001fc1SVitaly Wool addr = NULL; 12829a001fc1SVitaly Wool break; 12839a001fc1SVitaly Wool } 12842f1e5e4dSVitaly Wool 12851f862989SVitaly Wool if (addr) 12861f862989SVitaly Wool zhdr->mapped_count++; 12872f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 12889a001fc1SVitaly Wool out: 12899a001fc1SVitaly Wool return addr; 12909a001fc1SVitaly Wool } 12919a001fc1SVitaly Wool 12929a001fc1SVitaly Wool /** 12939a001fc1SVitaly Wool * z3fold_unmap() - unmaps the allocation associated with the given handle 12949a001fc1SVitaly Wool * @pool: pool in which the allocation resides 12959a001fc1SVitaly Wool * @handle: handle associated with the allocation to be unmapped 12969a001fc1SVitaly Wool */ 12979a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 12989a001fc1SVitaly Wool { 12999a001fc1SVitaly Wool struct z3fold_header *zhdr; 13009a001fc1SVitaly Wool struct page *page; 13019a001fc1SVitaly Wool enum buddy buddy; 13029a001fc1SVitaly Wool 13039a001fc1SVitaly Wool zhdr = handle_to_z3fold_header(handle); 13049a001fc1SVitaly Wool page = virt_to_page(zhdr); 13059a001fc1SVitaly Wool 13062f1e5e4dSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 13079a001fc1SVitaly Wool return; 13089a001fc1SVitaly Wool 13092f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 13109a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 13119a001fc1SVitaly Wool if (buddy == MIDDLE) 13129a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 13131f862989SVitaly Wool zhdr->mapped_count--; 13142f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 13159a001fc1SVitaly Wool } 13169a001fc1SVitaly Wool 13179a001fc1SVitaly Wool /** 13189a001fc1SVitaly Wool * z3fold_get_pool_size() - gets the z3fold pool size in pages 13199a001fc1SVitaly Wool * @pool: pool whose size is being queried 13209a001fc1SVitaly Wool * 132112d59ae6SVitaly Wool * Returns: size in pages of the given pool. 13229a001fc1SVitaly Wool */ 13239a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 13249a001fc1SVitaly Wool { 132512d59ae6SVitaly Wool return atomic64_read(&pool->pages_nr); 13269a001fc1SVitaly Wool } 13279a001fc1SVitaly Wool 13281f862989SVitaly Wool static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 13291f862989SVitaly Wool { 13301f862989SVitaly Wool struct z3fold_header *zhdr; 13311f862989SVitaly Wool struct z3fold_pool *pool; 13321f862989SVitaly Wool 13331f862989SVitaly Wool VM_BUG_ON_PAGE(!PageMovable(page), page); 13341f862989SVitaly Wool VM_BUG_ON_PAGE(PageIsolated(page), page); 13351f862989SVitaly Wool 1336*3f9d2b57SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private) || 1337*3f9d2b57SVitaly Wool test_bit(PAGE_CLAIMED, &page->private)) 13381f862989SVitaly Wool return false; 13391f862989SVitaly Wool 13401f862989SVitaly Wool zhdr = page_address(page); 13411f862989SVitaly Wool z3fold_page_lock(zhdr); 13421f862989SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 13431f862989SVitaly Wool test_bit(PAGE_STALE, &page->private)) 13441f862989SVitaly Wool goto out; 13451f862989SVitaly Wool 13461f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 13471f862989SVitaly Wool 13481f862989SVitaly Wool if (zhdr->mapped_count == 0) { 13491f862989SVitaly Wool kref_get(&zhdr->refcount); 13501f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 13511f862989SVitaly Wool list_del_init(&zhdr->buddy); 13521f862989SVitaly Wool spin_lock(&pool->lock); 13531f862989SVitaly Wool if (!list_empty(&page->lru)) 13541f862989SVitaly Wool list_del(&page->lru); 13551f862989SVitaly Wool spin_unlock(&pool->lock); 13561f862989SVitaly Wool z3fold_page_unlock(zhdr); 13571f862989SVitaly Wool return true; 13581f862989SVitaly Wool } 13591f862989SVitaly Wool out: 13601f862989SVitaly Wool z3fold_page_unlock(zhdr); 13611f862989SVitaly Wool return false; 13621f862989SVitaly Wool } 13631f862989SVitaly Wool 13641f862989SVitaly Wool static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, 13651f862989SVitaly Wool struct page *page, enum migrate_mode mode) 13661f862989SVitaly Wool { 13671f862989SVitaly Wool struct z3fold_header *zhdr, *new_zhdr; 13681f862989SVitaly Wool struct z3fold_pool *pool; 13691f862989SVitaly Wool struct address_space *new_mapping; 13701f862989SVitaly Wool 13711f862989SVitaly Wool VM_BUG_ON_PAGE(!PageMovable(page), page); 13721f862989SVitaly Wool VM_BUG_ON_PAGE(!PageIsolated(page), page); 1373810481a2SHenry Burns VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 13741f862989SVitaly Wool 13751f862989SVitaly Wool zhdr = page_address(page); 13761f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 13771f862989SVitaly Wool 13781f862989SVitaly Wool if (!z3fold_page_trylock(zhdr)) { 13791f862989SVitaly Wool return -EAGAIN; 13801f862989SVitaly Wool } 13811f862989SVitaly Wool if (zhdr->mapped_count != 0) { 13821f862989SVitaly Wool z3fold_page_unlock(zhdr); 13831f862989SVitaly Wool return -EBUSY; 13841f862989SVitaly Wool } 1385c92d2f38SHenry Burns if (work_pending(&zhdr->work)) { 1386c92d2f38SHenry Burns z3fold_page_unlock(zhdr); 1387c92d2f38SHenry Burns return -EAGAIN; 1388c92d2f38SHenry Burns } 13891f862989SVitaly Wool new_zhdr = page_address(newpage); 13901f862989SVitaly Wool memcpy(new_zhdr, zhdr, PAGE_SIZE); 13911f862989SVitaly Wool newpage->private = page->private; 13921f862989SVitaly Wool page->private = 0; 13931f862989SVitaly Wool z3fold_page_unlock(zhdr); 13941f862989SVitaly Wool spin_lock_init(&new_zhdr->page_lock); 1395c92d2f38SHenry Burns INIT_WORK(&new_zhdr->work, compact_page_work); 1396c92d2f38SHenry Burns /* 1397c92d2f38SHenry Burns * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1398c92d2f38SHenry Burns * so we only have to reinitialize it. 1399c92d2f38SHenry Burns */ 1400c92d2f38SHenry Burns INIT_LIST_HEAD(&new_zhdr->buddy); 14011f862989SVitaly Wool new_mapping = page_mapping(page); 14021f862989SVitaly Wool __ClearPageMovable(page); 14031f862989SVitaly Wool ClearPagePrivate(page); 14041f862989SVitaly Wool 14051f862989SVitaly Wool get_page(newpage); 14061f862989SVitaly Wool z3fold_page_lock(new_zhdr); 14071f862989SVitaly Wool if (new_zhdr->first_chunks) 14081f862989SVitaly Wool encode_handle(new_zhdr, FIRST); 14091f862989SVitaly Wool if (new_zhdr->last_chunks) 14101f862989SVitaly Wool encode_handle(new_zhdr, LAST); 14111f862989SVitaly Wool if (new_zhdr->middle_chunks) 14121f862989SVitaly Wool encode_handle(new_zhdr, MIDDLE); 14131f862989SVitaly Wool set_bit(NEEDS_COMPACTING, &newpage->private); 14141f862989SVitaly Wool new_zhdr->cpu = smp_processor_id(); 14151f862989SVitaly Wool spin_lock(&pool->lock); 14161f862989SVitaly Wool list_add(&newpage->lru, &pool->lru); 14171f862989SVitaly Wool spin_unlock(&pool->lock); 14181f862989SVitaly Wool __SetPageMovable(newpage, new_mapping); 14191f862989SVitaly Wool z3fold_page_unlock(new_zhdr); 14201f862989SVitaly Wool 14211f862989SVitaly Wool queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 14221f862989SVitaly Wool 14231f862989SVitaly Wool page_mapcount_reset(page); 14241f862989SVitaly Wool put_page(page); 14251f862989SVitaly Wool return 0; 14261f862989SVitaly Wool } 14271f862989SVitaly Wool 14281f862989SVitaly Wool static void z3fold_page_putback(struct page *page) 14291f862989SVitaly Wool { 14301f862989SVitaly Wool struct z3fold_header *zhdr; 14311f862989SVitaly Wool struct z3fold_pool *pool; 14321f862989SVitaly Wool 14331f862989SVitaly Wool zhdr = page_address(page); 14341f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 14351f862989SVitaly Wool 14361f862989SVitaly Wool z3fold_page_lock(zhdr); 14371f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 14381f862989SVitaly Wool list_del_init(&zhdr->buddy); 14391f862989SVitaly Wool INIT_LIST_HEAD(&page->lru); 14401f862989SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 14411f862989SVitaly Wool atomic64_dec(&pool->pages_nr); 14421f862989SVitaly Wool return; 14431f862989SVitaly Wool } 14441f862989SVitaly Wool spin_lock(&pool->lock); 14451f862989SVitaly Wool list_add(&page->lru, &pool->lru); 14461f862989SVitaly Wool spin_unlock(&pool->lock); 14471f862989SVitaly Wool z3fold_page_unlock(zhdr); 14481f862989SVitaly Wool } 14491f862989SVitaly Wool 14501f862989SVitaly Wool static const struct address_space_operations z3fold_aops = { 14511f862989SVitaly Wool .isolate_page = z3fold_page_isolate, 14521f862989SVitaly Wool .migratepage = z3fold_page_migrate, 14531f862989SVitaly Wool .putback_page = z3fold_page_putback, 14541f862989SVitaly Wool }; 14551f862989SVitaly Wool 14569a001fc1SVitaly Wool /***************** 14579a001fc1SVitaly Wool * zpool 14589a001fc1SVitaly Wool ****************/ 14599a001fc1SVitaly Wool 14609a001fc1SVitaly Wool static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 14619a001fc1SVitaly Wool { 14629a001fc1SVitaly Wool if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 14639a001fc1SVitaly Wool return pool->zpool_ops->evict(pool->zpool, handle); 14649a001fc1SVitaly Wool else 14659a001fc1SVitaly Wool return -ENOENT; 14669a001fc1SVitaly Wool } 14679a001fc1SVitaly Wool 14689a001fc1SVitaly Wool static const struct z3fold_ops z3fold_zpool_ops = { 14699a001fc1SVitaly Wool .evict = z3fold_zpool_evict 14709a001fc1SVitaly Wool }; 14719a001fc1SVitaly Wool 14729a001fc1SVitaly Wool static void *z3fold_zpool_create(const char *name, gfp_t gfp, 14739a001fc1SVitaly Wool const struct zpool_ops *zpool_ops, 14749a001fc1SVitaly Wool struct zpool *zpool) 14759a001fc1SVitaly Wool { 14769a001fc1SVitaly Wool struct z3fold_pool *pool; 14779a001fc1SVitaly Wool 1478d30561c5SVitaly Wool pool = z3fold_create_pool(name, gfp, 1479d30561c5SVitaly Wool zpool_ops ? &z3fold_zpool_ops : NULL); 14809a001fc1SVitaly Wool if (pool) { 14819a001fc1SVitaly Wool pool->zpool = zpool; 14829a001fc1SVitaly Wool pool->zpool_ops = zpool_ops; 14839a001fc1SVitaly Wool } 14849a001fc1SVitaly Wool return pool; 14859a001fc1SVitaly Wool } 14869a001fc1SVitaly Wool 14879a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool) 14889a001fc1SVitaly Wool { 14899a001fc1SVitaly Wool z3fold_destroy_pool(pool); 14909a001fc1SVitaly Wool } 14919a001fc1SVitaly Wool 14929a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 14939a001fc1SVitaly Wool unsigned long *handle) 14949a001fc1SVitaly Wool { 14959a001fc1SVitaly Wool return z3fold_alloc(pool, size, gfp, handle); 14969a001fc1SVitaly Wool } 14979a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle) 14989a001fc1SVitaly Wool { 14999a001fc1SVitaly Wool z3fold_free(pool, handle); 15009a001fc1SVitaly Wool } 15019a001fc1SVitaly Wool 15029a001fc1SVitaly Wool static int z3fold_zpool_shrink(void *pool, unsigned int pages, 15039a001fc1SVitaly Wool unsigned int *reclaimed) 15049a001fc1SVitaly Wool { 15059a001fc1SVitaly Wool unsigned int total = 0; 15069a001fc1SVitaly Wool int ret = -EINVAL; 15079a001fc1SVitaly Wool 15089a001fc1SVitaly Wool while (total < pages) { 15099a001fc1SVitaly Wool ret = z3fold_reclaim_page(pool, 8); 15109a001fc1SVitaly Wool if (ret < 0) 15119a001fc1SVitaly Wool break; 15129a001fc1SVitaly Wool total++; 15139a001fc1SVitaly Wool } 15149a001fc1SVitaly Wool 15159a001fc1SVitaly Wool if (reclaimed) 15169a001fc1SVitaly Wool *reclaimed = total; 15179a001fc1SVitaly Wool 15189a001fc1SVitaly Wool return ret; 15199a001fc1SVitaly Wool } 15209a001fc1SVitaly Wool 15219a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle, 15229a001fc1SVitaly Wool enum zpool_mapmode mm) 15239a001fc1SVitaly Wool { 15249a001fc1SVitaly Wool return z3fold_map(pool, handle); 15259a001fc1SVitaly Wool } 15269a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle) 15279a001fc1SVitaly Wool { 15289a001fc1SVitaly Wool z3fold_unmap(pool, handle); 15299a001fc1SVitaly Wool } 15309a001fc1SVitaly Wool 15319a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool) 15329a001fc1SVitaly Wool { 15339a001fc1SVitaly Wool return z3fold_get_pool_size(pool) * PAGE_SIZE; 15349a001fc1SVitaly Wool } 15359a001fc1SVitaly Wool 15369a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = { 15379a001fc1SVitaly Wool .type = "z3fold", 15389a001fc1SVitaly Wool .owner = THIS_MODULE, 15399a001fc1SVitaly Wool .create = z3fold_zpool_create, 15409a001fc1SVitaly Wool .destroy = z3fold_zpool_destroy, 15419a001fc1SVitaly Wool .malloc = z3fold_zpool_malloc, 15429a001fc1SVitaly Wool .free = z3fold_zpool_free, 15439a001fc1SVitaly Wool .shrink = z3fold_zpool_shrink, 15449a001fc1SVitaly Wool .map = z3fold_zpool_map, 15459a001fc1SVitaly Wool .unmap = z3fold_zpool_unmap, 15469a001fc1SVitaly Wool .total_size = z3fold_zpool_total_size, 15479a001fc1SVitaly Wool }; 15489a001fc1SVitaly Wool 15499a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold"); 15509a001fc1SVitaly Wool 15519a001fc1SVitaly Wool static int __init init_z3fold(void) 15529a001fc1SVitaly Wool { 15531f862989SVitaly Wool int ret; 15541f862989SVitaly Wool 1555ede93213SVitaly Wool /* Make sure the z3fold header is not larger than the page size */ 1556ede93213SVitaly Wool BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 15571f862989SVitaly Wool ret = z3fold_mount(); 15581f862989SVitaly Wool if (ret) 15591f862989SVitaly Wool return ret; 15601f862989SVitaly Wool 15619a001fc1SVitaly Wool zpool_register_driver(&z3fold_zpool_driver); 15629a001fc1SVitaly Wool 15639a001fc1SVitaly Wool return 0; 15649a001fc1SVitaly Wool } 15659a001fc1SVitaly Wool 15669a001fc1SVitaly Wool static void __exit exit_z3fold(void) 15679a001fc1SVitaly Wool { 15681f862989SVitaly Wool z3fold_unmount(); 15699a001fc1SVitaly Wool zpool_unregister_driver(&z3fold_zpool_driver); 15709a001fc1SVitaly Wool } 15719a001fc1SVitaly Wool 15729a001fc1SVitaly Wool module_init(init_z3fold); 15739a001fc1SVitaly Wool module_exit(exit_z3fold); 15749a001fc1SVitaly Wool 15759a001fc1SVitaly Wool MODULE_LICENSE("GPL"); 15769a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 15779a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1578