109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 29a001fc1SVitaly Wool /* 39a001fc1SVitaly Wool * z3fold.c 49a001fc1SVitaly Wool * 59a001fc1SVitaly Wool * Author: Vitaly Wool <vitaly.wool@konsulko.com> 69a001fc1SVitaly Wool * Copyright (C) 2016, Sony Mobile Communications Inc. 79a001fc1SVitaly Wool * 89a001fc1SVitaly Wool * This implementation is based on zbud written by Seth Jennings. 99a001fc1SVitaly Wool * 109a001fc1SVitaly Wool * z3fold is an special purpose allocator for storing compressed pages. It 119a001fc1SVitaly Wool * can store up to three compressed pages per page which improves the 129a001fc1SVitaly Wool * compression ratio of zbud while retaining its main concepts (e. g. always 139a001fc1SVitaly Wool * storing an integral number of objects per page) and simplicity. 149a001fc1SVitaly Wool * It still has simple and deterministic reclaim properties that make it 159a001fc1SVitaly Wool * preferable to a higher density approach (with no requirement on integral 169a001fc1SVitaly Wool * number of object per page) when reclaim is used. 179a001fc1SVitaly Wool * 189a001fc1SVitaly Wool * As in zbud, pages are divided into "chunks". The size of the chunks is 199a001fc1SVitaly Wool * fixed at compile time and is determined by NCHUNKS_ORDER below. 209a001fc1SVitaly Wool * 219a001fc1SVitaly Wool * z3fold doesn't export any API and is meant to be used via zpool API. 229a001fc1SVitaly Wool */ 239a001fc1SVitaly Wool 249a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 259a001fc1SVitaly Wool 269a001fc1SVitaly Wool #include <linux/atomic.h> 27d30561c5SVitaly Wool #include <linux/sched.h> 281f862989SVitaly Wool #include <linux/cpumask.h> 299a001fc1SVitaly Wool #include <linux/list.h> 309a001fc1SVitaly Wool #include <linux/mm.h> 319a001fc1SVitaly Wool #include <linux/module.h> 321f862989SVitaly Wool #include <linux/page-flags.h> 331f862989SVitaly Wool #include <linux/migrate.h> 341f862989SVitaly Wool #include <linux/node.h> 351f862989SVitaly Wool #include <linux/compaction.h> 36d30561c5SVitaly Wool #include <linux/percpu.h> 379a001fc1SVitaly Wool #include <linux/preempt.h> 38d30561c5SVitaly Wool #include <linux/workqueue.h> 399a001fc1SVitaly Wool #include <linux/slab.h> 409a001fc1SVitaly Wool #include <linux/spinlock.h> 419a001fc1SVitaly Wool #include <linux/zpool.h> 42af4798a5SQian Cai #include <linux/kmemleak.h> 439a001fc1SVitaly Wool 449a001fc1SVitaly Wool /* 459a001fc1SVitaly Wool * NCHUNKS_ORDER determines the internal allocation granularity, effectively 469a001fc1SVitaly Wool * adjusting internal fragmentation. It also determines the number of 479a001fc1SVitaly Wool * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 48ede93213SVitaly Wool * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 49ede93213SVitaly Wool * in the beginning of an allocated page are occupied by z3fold header, so 50ede93213SVitaly Wool * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 51ede93213SVitaly Wool * which shows the max number of free chunks in z3fold page, also there will 52ede93213SVitaly Wool * be 63, or 62, respectively, freelists per pool. 539a001fc1SVitaly Wool */ 549a001fc1SVitaly Wool #define NCHUNKS_ORDER 6 559a001fc1SVitaly Wool 569a001fc1SVitaly Wool #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 579a001fc1SVitaly Wool #define CHUNK_SIZE (1 << CHUNK_SHIFT) 58ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 59ede93213SVitaly Wool #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 60ede93213SVitaly Wool #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 61e3c0db4fSMiaohe Lin #define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) 629a001fc1SVitaly Wool 63f201ebd8Szhong jiang #define BUDDY_MASK (0x3) 64ca0246bbSVitaly Wool #define BUDDY_SHIFT 2 657c2b8baaSVitaly Wool #define SLOTS_ALIGN (0x40) 667c2b8baaSVitaly Wool 677c2b8baaSVitaly Wool /***************** 687c2b8baaSVitaly Wool * Structures 697c2b8baaSVitaly Wool *****************/ 707c2b8baaSVitaly Wool struct z3fold_pool; 717c2b8baaSVitaly Wool 727c2b8baaSVitaly Wool enum buddy { 737c2b8baaSVitaly Wool HEADLESS = 0, 747c2b8baaSVitaly Wool FIRST, 757c2b8baaSVitaly Wool MIDDLE, 767c2b8baaSVitaly Wool LAST, 777c2b8baaSVitaly Wool BUDDIES_MAX = LAST 787c2b8baaSVitaly Wool }; 797c2b8baaSVitaly Wool 807c2b8baaSVitaly Wool struct z3fold_buddy_slots { 817c2b8baaSVitaly Wool /* 827c2b8baaSVitaly Wool * we are using BUDDY_MASK in handle_to_buddy etc. so there should 837c2b8baaSVitaly Wool * be enough slots to hold all possible variants 847c2b8baaSVitaly Wool */ 857c2b8baaSVitaly Wool unsigned long slot[BUDDY_MASK + 1]; 86fc548865SVitaly Wool unsigned long pool; /* back link */ 874a3ac931SVitaly Wool rwlock_t lock; 887c2b8baaSVitaly Wool }; 897c2b8baaSVitaly Wool #define HANDLE_FLAG_MASK (0x03) 907c2b8baaSVitaly Wool 917c2b8baaSVitaly Wool /* 927c2b8baaSVitaly Wool * struct z3fold_header - z3fold page metadata occupying first chunks of each 937c2b8baaSVitaly Wool * z3fold page, except for HEADLESS pages 947c2b8baaSVitaly Wool * @buddy: links the z3fold page into the relevant list in the 957c2b8baaSVitaly Wool * pool 967c2b8baaSVitaly Wool * @page_lock: per-page lock 977c2b8baaSVitaly Wool * @refcount: reference count for the z3fold page 987c2b8baaSVitaly Wool * @work: work_struct for page layout optimization 997c2b8baaSVitaly Wool * @slots: pointer to the structure holding buddy slots 100bb9a374dSVitaly Wool * @pool: pointer to the containing pool 1017c2b8baaSVitaly Wool * @cpu: CPU which this page "belongs" to 1027c2b8baaSVitaly Wool * @first_chunks: the size of the first buddy in chunks, 0 if free 1037c2b8baaSVitaly Wool * @middle_chunks: the size of the middle buddy in chunks, 0 if free 1047c2b8baaSVitaly Wool * @last_chunks: the size of the last buddy in chunks, 0 if free 1057c2b8baaSVitaly Wool * @first_num: the starting number (for the first handle) 1061f862989SVitaly Wool * @mapped_count: the number of objects currently mapped 1077c2b8baaSVitaly Wool */ 1087c2b8baaSVitaly Wool struct z3fold_header { 1097c2b8baaSVitaly Wool struct list_head buddy; 1107c2b8baaSVitaly Wool spinlock_t page_lock; 1117c2b8baaSVitaly Wool struct kref refcount; 1127c2b8baaSVitaly Wool struct work_struct work; 1137c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 114bb9a374dSVitaly Wool struct z3fold_pool *pool; 1157c2b8baaSVitaly Wool short cpu; 1167c2b8baaSVitaly Wool unsigned short first_chunks; 1177c2b8baaSVitaly Wool unsigned short middle_chunks; 1187c2b8baaSVitaly Wool unsigned short last_chunks; 1197c2b8baaSVitaly Wool unsigned short start_middle; 1207c2b8baaSVitaly Wool unsigned short first_num:2; 1211f862989SVitaly Wool unsigned short mapped_count:2; 1224a3ac931SVitaly Wool unsigned short foreign_handles:2; 1237c2b8baaSVitaly Wool }; 1249a001fc1SVitaly Wool 1259a001fc1SVitaly Wool /** 1269a001fc1SVitaly Wool * struct z3fold_pool - stores metadata for each z3fold pool 127d30561c5SVitaly Wool * @name: pool name 128e774a7bcSDomenico Cerasuolo * @lock: protects pool unbuddied lists 129d30561c5SVitaly Wool * @stale_lock: protects pool stale page list 130d30561c5SVitaly Wool * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 131d30561c5SVitaly Wool * buddies; the list each z3fold page is added to depends on 132d30561c5SVitaly Wool * the size of its free region. 133d30561c5SVitaly Wool * @stale: list of pages marked for freeing 1349a001fc1SVitaly Wool * @pages_nr: number of z3fold pages in the pool. 1357c2b8baaSVitaly Wool * @c_handle: cache for z3fold_buddy_slots allocation 136d30561c5SVitaly Wool * @compact_wq: workqueue for page layout background optimization 137d30561c5SVitaly Wool * @release_wq: workqueue for safe page release 138d30561c5SVitaly Wool * @work: work_struct for safe page release 1399a001fc1SVitaly Wool * 1409a001fc1SVitaly Wool * This structure is allocated at pool creation time and maintains metadata 1419a001fc1SVitaly Wool * pertaining to a particular z3fold pool. 1429a001fc1SVitaly Wool */ 1439a001fc1SVitaly Wool struct z3fold_pool { 144d30561c5SVitaly Wool const char *name; 1459a001fc1SVitaly Wool spinlock_t lock; 146d30561c5SVitaly Wool spinlock_t stale_lock; 147d30561c5SVitaly Wool struct list_head *unbuddied; 148d30561c5SVitaly Wool struct list_head stale; 14912d59ae6SVitaly Wool atomic64_t pages_nr; 1507c2b8baaSVitaly Wool struct kmem_cache *c_handle; 151d30561c5SVitaly Wool struct workqueue_struct *compact_wq; 152d30561c5SVitaly Wool struct workqueue_struct *release_wq; 153d30561c5SVitaly Wool struct work_struct work; 1549a001fc1SVitaly Wool }; 1559a001fc1SVitaly Wool 1569a001fc1SVitaly Wool /* 1579a001fc1SVitaly Wool * Internal z3fold page flags 1589a001fc1SVitaly Wool */ 1599a001fc1SVitaly Wool enum z3fold_page_flags { 1605a27aa82SVitaly Wool PAGE_HEADLESS = 0, 1619a001fc1SVitaly Wool MIDDLE_CHUNK_MAPPED, 162d30561c5SVitaly Wool NEEDS_COMPACTING, 1636098d7e1SVitaly Wool PAGE_STALE, 164ca0246bbSVitaly Wool PAGE_CLAIMED, /* by either reclaim or free */ 165943fb61dSMiaohe Lin PAGE_MIGRATED, /* page is migrated and soon to be released */ 1669a001fc1SVitaly Wool }; 1679a001fc1SVitaly Wool 1684a3ac931SVitaly Wool /* 169dcf5aedbSVitaly Wool * handle flags, go under HANDLE_FLAG_MASK 170dcf5aedbSVitaly Wool */ 171dcf5aedbSVitaly Wool enum z3fold_handle_flags { 172dcf5aedbSVitaly Wool HANDLES_NOFREE = 0, 173dcf5aedbSVitaly Wool }; 174dcf5aedbSVitaly Wool 175dcf5aedbSVitaly Wool /* 1764a3ac931SVitaly Wool * Forward declarations 1774a3ac931SVitaly Wool */ 1784a3ac931SVitaly Wool static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 1794a3ac931SVitaly Wool static void compact_page_work(struct work_struct *w); 1804a3ac931SVitaly Wool 1819a001fc1SVitaly Wool /***************** 1829a001fc1SVitaly Wool * Helpers 1839a001fc1SVitaly Wool *****************/ 1849a001fc1SVitaly Wool 1859a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */ 1869a001fc1SVitaly Wool static int size_to_chunks(size_t size) 1879a001fc1SVitaly Wool { 1889a001fc1SVitaly Wool return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 1899a001fc1SVitaly Wool } 1909a001fc1SVitaly Wool 1919a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \ 1929a001fc1SVitaly Wool for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 1939a001fc1SVitaly Wool 194bb9f6f63SVitaly Wool static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 195bb9f6f63SVitaly Wool gfp_t gfp) 1967c2b8baaSVitaly Wool { 197f4bad643SMiaohe Lin struct z3fold_buddy_slots *slots = kmem_cache_zalloc(pool->c_handle, 198f4bad643SMiaohe Lin gfp); 1997c2b8baaSVitaly Wool 2007c2b8baaSVitaly Wool if (slots) { 201af4798a5SQian Cai /* It will be freed separately in free_handle(). */ 202af4798a5SQian Cai kmemleak_not_leak(slots); 2037c2b8baaSVitaly Wool slots->pool = (unsigned long)pool; 2044a3ac931SVitaly Wool rwlock_init(&slots->lock); 2057c2b8baaSVitaly Wool } 2067c2b8baaSVitaly Wool 2077c2b8baaSVitaly Wool return slots; 2087c2b8baaSVitaly Wool } 2097c2b8baaSVitaly Wool 2107c2b8baaSVitaly Wool static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 2117c2b8baaSVitaly Wool { 2127c2b8baaSVitaly Wool return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 2137c2b8baaSVitaly Wool } 2147c2b8baaSVitaly Wool 2157c2b8baaSVitaly Wool static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 2167c2b8baaSVitaly Wool { 2177c2b8baaSVitaly Wool return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 2187c2b8baaSVitaly Wool } 2197c2b8baaSVitaly Wool 2204a3ac931SVitaly Wool /* Lock a z3fold page */ 2214a3ac931SVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr) 2224a3ac931SVitaly Wool { 2234a3ac931SVitaly Wool spin_lock(&zhdr->page_lock); 2244a3ac931SVitaly Wool } 2254a3ac931SVitaly Wool 2264a3ac931SVitaly Wool /* Try to lock a z3fold page */ 2274a3ac931SVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 2284a3ac931SVitaly Wool { 2294a3ac931SVitaly Wool return spin_trylock(&zhdr->page_lock); 2304a3ac931SVitaly Wool } 2314a3ac931SVitaly Wool 2324a3ac931SVitaly Wool /* Unlock a z3fold page */ 2334a3ac931SVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 2344a3ac931SVitaly Wool { 2354a3ac931SVitaly Wool spin_unlock(&zhdr->page_lock); 2364a3ac931SVitaly Wool } 2374a3ac931SVitaly Wool 238767cc6c5SMiaohe Lin /* return locked z3fold page if it's not headless */ 239767cc6c5SMiaohe Lin static inline struct z3fold_header *get_z3fold_header(unsigned long handle) 2404a3ac931SVitaly Wool { 2414a3ac931SVitaly Wool struct z3fold_buddy_slots *slots; 2424a3ac931SVitaly Wool struct z3fold_header *zhdr; 2434a3ac931SVitaly Wool int locked = 0; 2444a3ac931SVitaly Wool 2454a3ac931SVitaly Wool if (!(handle & (1 << PAGE_HEADLESS))) { 2464a3ac931SVitaly Wool slots = handle_to_slots(handle); 2474a3ac931SVitaly Wool do { 2484a3ac931SVitaly Wool unsigned long addr; 2494a3ac931SVitaly Wool 2504a3ac931SVitaly Wool read_lock(&slots->lock); 2514a3ac931SVitaly Wool addr = *(unsigned long *)handle; 2524a3ac931SVitaly Wool zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 2534a3ac931SVitaly Wool locked = z3fold_page_trylock(zhdr); 2544a3ac931SVitaly Wool read_unlock(&slots->lock); 255943fb61dSMiaohe Lin if (locked) { 256943fb61dSMiaohe Lin struct page *page = virt_to_page(zhdr); 257943fb61dSMiaohe Lin 258943fb61dSMiaohe Lin if (!test_bit(PAGE_MIGRATED, &page->private)) 2594a3ac931SVitaly Wool break; 260943fb61dSMiaohe Lin z3fold_page_unlock(zhdr); 261943fb61dSMiaohe Lin } 2624a3ac931SVitaly Wool cpu_relax(); 263767cc6c5SMiaohe Lin } while (true); 2644a3ac931SVitaly Wool } else { 2654a3ac931SVitaly Wool zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 2664a3ac931SVitaly Wool } 2674a3ac931SVitaly Wool 2684a3ac931SVitaly Wool return zhdr; 2694a3ac931SVitaly Wool } 2704a3ac931SVitaly Wool 2714a3ac931SVitaly Wool static inline void put_z3fold_header(struct z3fold_header *zhdr) 2724a3ac931SVitaly Wool { 2734a3ac931SVitaly Wool struct page *page = virt_to_page(zhdr); 2744a3ac931SVitaly Wool 2754a3ac931SVitaly Wool if (!test_bit(PAGE_HEADLESS, &page->private)) 2764a3ac931SVitaly Wool z3fold_page_unlock(zhdr); 2774a3ac931SVitaly Wool } 2784a3ac931SVitaly Wool 279fc548865SVitaly Wool static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 2807c2b8baaSVitaly Wool { 2817c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 2827c2b8baaSVitaly Wool int i; 2837c2b8baaSVitaly Wool bool is_free; 2847c2b8baaSVitaly Wool 2854a3ac931SVitaly Wool if (WARN_ON(*(unsigned long *)handle == 0)) 2864a3ac931SVitaly Wool return; 2874a3ac931SVitaly Wool 2887c2b8baaSVitaly Wool slots = handle_to_slots(handle); 2894a3ac931SVitaly Wool write_lock(&slots->lock); 2904a3ac931SVitaly Wool *(unsigned long *)handle = 0; 291dcf5aedbSVitaly Wool 292dcf5aedbSVitaly Wool if (test_bit(HANDLES_NOFREE, &slots->pool)) { 293dcf5aedbSVitaly Wool write_unlock(&slots->lock); 294dcf5aedbSVitaly Wool return; /* simple case, nothing else to do */ 295dcf5aedbSVitaly Wool } 296dcf5aedbSVitaly Wool 297fc548865SVitaly Wool if (zhdr->slots != slots) 2984a3ac931SVitaly Wool zhdr->foreign_handles--; 299fc548865SVitaly Wool 3007c2b8baaSVitaly Wool is_free = true; 3017c2b8baaSVitaly Wool for (i = 0; i <= BUDDY_MASK; i++) { 3027c2b8baaSVitaly Wool if (slots->slot[i]) { 3037c2b8baaSVitaly Wool is_free = false; 3047c2b8baaSVitaly Wool break; 3057c2b8baaSVitaly Wool } 3067c2b8baaSVitaly Wool } 307d8f117abSUladzislau Rezki write_unlock(&slots->lock); 3087c2b8baaSVitaly Wool 3097c2b8baaSVitaly Wool if (is_free) { 3107c2b8baaSVitaly Wool struct z3fold_pool *pool = slots_to_pool(slots); 3117c2b8baaSVitaly Wool 312fc548865SVitaly Wool if (zhdr->slots == slots) 313fc548865SVitaly Wool zhdr->slots = NULL; 3147c2b8baaSVitaly Wool kmem_cache_free(pool->c_handle, slots); 3157c2b8baaSVitaly Wool } 3167c2b8baaSVitaly Wool } 3177c2b8baaSVitaly Wool 3189a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */ 31963398413SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 320bb9f6f63SVitaly Wool struct z3fold_pool *pool, gfp_t gfp) 3219a001fc1SVitaly Wool { 3229a001fc1SVitaly Wool struct z3fold_header *zhdr = page_address(page); 32363398413SVitaly Wool struct z3fold_buddy_slots *slots; 3249a001fc1SVitaly Wool 3259a001fc1SVitaly Wool clear_bit(PAGE_HEADLESS, &page->private); 3269a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 327d30561c5SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 328d30561c5SVitaly Wool clear_bit(PAGE_STALE, &page->private); 329ca0246bbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 330943fb61dSMiaohe Lin clear_bit(PAGE_MIGRATED, &page->private); 33163398413SVitaly Wool if (headless) 33263398413SVitaly Wool return zhdr; 33363398413SVitaly Wool 33463398413SVitaly Wool slots = alloc_slots(pool, gfp); 33563398413SVitaly Wool if (!slots) 33663398413SVitaly Wool return NULL; 3379a001fc1SVitaly Wool 338c457cd96SMiaohe Lin memset(zhdr, 0, sizeof(*zhdr)); 3392f1e5e4dSVitaly Wool spin_lock_init(&zhdr->page_lock); 3405a27aa82SVitaly Wool kref_init(&zhdr->refcount); 341d30561c5SVitaly Wool zhdr->cpu = -1; 3427c2b8baaSVitaly Wool zhdr->slots = slots; 343bb9a374dSVitaly Wool zhdr->pool = pool; 3449a001fc1SVitaly Wool INIT_LIST_HEAD(&zhdr->buddy); 345d30561c5SVitaly Wool INIT_WORK(&zhdr->work, compact_page_work); 3469a001fc1SVitaly Wool return zhdr; 3479a001fc1SVitaly Wool } 3489a001fc1SVitaly Wool 3499a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */ 3501f862989SVitaly Wool static void free_z3fold_page(struct page *page, bool headless) 3519a001fc1SVitaly Wool { 3521f862989SVitaly Wool if (!headless) { 3531f862989SVitaly Wool lock_page(page); 3541f862989SVitaly Wool __ClearPageMovable(page); 3551f862989SVitaly Wool unlock_page(page); 3561f862989SVitaly Wool } 3575a27aa82SVitaly Wool __free_page(page); 3585a27aa82SVitaly Wool } 3595a27aa82SVitaly Wool 3607c2b8baaSVitaly Wool /* Helper function to build the index */ 3617c2b8baaSVitaly Wool static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 3627c2b8baaSVitaly Wool { 3637c2b8baaSVitaly Wool return (bud + zhdr->first_num) & BUDDY_MASK; 3647c2b8baaSVitaly Wool } 3657c2b8baaSVitaly Wool 3669a001fc1SVitaly Wool /* 367*929e4c35SZhongkun He * Encodes the handle of a particular buddy within a z3fold page. 368*929e4c35SZhongkun He * Zhdr->page_lock should be held as this function accesses first_num 369*929e4c35SZhongkun He * if bud != HEADLESS. 3709a001fc1SVitaly Wool */ 3713f9d2b57SVitaly Wool static unsigned long __encode_handle(struct z3fold_header *zhdr, 3723f9d2b57SVitaly Wool struct z3fold_buddy_slots *slots, 3733f9d2b57SVitaly Wool enum buddy bud) 3749a001fc1SVitaly Wool { 3757c2b8baaSVitaly Wool unsigned long h = (unsigned long)zhdr; 3767c2b8baaSVitaly Wool int idx = 0; 3779a001fc1SVitaly Wool 3787c2b8baaSVitaly Wool /* 3797c2b8baaSVitaly Wool * For a headless page, its handle is its pointer with the extra 3807c2b8baaSVitaly Wool * PAGE_HEADLESS bit set 3817c2b8baaSVitaly Wool */ 3827c2b8baaSVitaly Wool if (bud == HEADLESS) 3837c2b8baaSVitaly Wool return h | (1 << PAGE_HEADLESS); 3847c2b8baaSVitaly Wool 3857c2b8baaSVitaly Wool /* otherwise, return pointer to encoded handle */ 3867c2b8baaSVitaly Wool idx = __idx(zhdr, bud); 3877c2b8baaSVitaly Wool h += idx; 388ca0246bbSVitaly Wool if (bud == LAST) 3897c2b8baaSVitaly Wool h |= (zhdr->last_chunks << BUDDY_SHIFT); 3907c2b8baaSVitaly Wool 3914a3ac931SVitaly Wool write_lock(&slots->lock); 3927c2b8baaSVitaly Wool slots->slot[idx] = h; 3934a3ac931SVitaly Wool write_unlock(&slots->lock); 3947c2b8baaSVitaly Wool return (unsigned long)&slots->slot[idx]; 3959a001fc1SVitaly Wool } 3969a001fc1SVitaly Wool 3973f9d2b57SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 3983f9d2b57SVitaly Wool { 3993f9d2b57SVitaly Wool return __encode_handle(zhdr, zhdr->slots, bud); 4003f9d2b57SVitaly Wool } 4013f9d2b57SVitaly Wool 402ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */ 403ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle) 404ca0246bbSVitaly Wool { 4054a3ac931SVitaly Wool struct z3fold_buddy_slots *slots = handle_to_slots(handle); 4064a3ac931SVitaly Wool unsigned long addr; 4077c2b8baaSVitaly Wool 4084a3ac931SVitaly Wool read_lock(&slots->lock); 4094a3ac931SVitaly Wool addr = *(unsigned long *)handle; 4104a3ac931SVitaly Wool read_unlock(&slots->lock); 4117c2b8baaSVitaly Wool return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 412ca0246bbSVitaly Wool } 413ca0246bbSVitaly Wool 414f201ebd8Szhong jiang /* 415f201ebd8Szhong jiang * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 416f201ebd8Szhong jiang * but that doesn't matter. because the masking will result in the 417f201ebd8Szhong jiang * correct buddy number. 418f201ebd8Szhong jiang */ 4199a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle) 4209a001fc1SVitaly Wool { 4217c2b8baaSVitaly Wool struct z3fold_header *zhdr; 4224a3ac931SVitaly Wool struct z3fold_buddy_slots *slots = handle_to_slots(handle); 4237c2b8baaSVitaly Wool unsigned long addr; 4247c2b8baaSVitaly Wool 4254a3ac931SVitaly Wool read_lock(&slots->lock); 4267c2b8baaSVitaly Wool WARN_ON(handle & (1 << PAGE_HEADLESS)); 4277c2b8baaSVitaly Wool addr = *(unsigned long *)handle; 4284a3ac931SVitaly Wool read_unlock(&slots->lock); 4297c2b8baaSVitaly Wool zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 4307c2b8baaSVitaly Wool return (addr - zhdr->first_num) & BUDDY_MASK; 4319a001fc1SVitaly Wool } 4329a001fc1SVitaly Wool 4339050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 4349050cce1SVitaly Wool { 435bb9a374dSVitaly Wool return zhdr->pool; 4369050cce1SVitaly Wool } 4379050cce1SVitaly Wool 438d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439d30561c5SVitaly Wool { 440d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 4419050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 442d30561c5SVitaly Wool 443d30561c5SVitaly Wool WARN_ON(!list_empty(&zhdr->buddy)); 444d30561c5SVitaly Wool set_bit(PAGE_STALE, &page->private); 44535529357SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 446d30561c5SVitaly Wool spin_lock(&pool->lock); 447d30561c5SVitaly Wool spin_unlock(&pool->lock); 4484a3ac931SVitaly Wool 449d30561c5SVitaly Wool if (locked) 450d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 4514a3ac931SVitaly Wool 452d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 453d30561c5SVitaly Wool list_add(&zhdr->buddy, &pool->stale); 454d30561c5SVitaly Wool queue_work(pool->release_wq, &pool->work); 455d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 4565e36c25bSMiaohe Lin 4575e36c25bSMiaohe Lin atomic64_dec(&pool->pages_nr); 458d30561c5SVitaly Wool } 459d30561c5SVitaly Wool 460d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref) 461d30561c5SVitaly Wool { 462d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 463d30561c5SVitaly Wool refcount); 464d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 465d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 466d30561c5SVitaly Wool } 467d30561c5SVitaly Wool 468d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref) 469d30561c5SVitaly Wool { 470d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 471d30561c5SVitaly Wool refcount); 4729050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 4734a3ac931SVitaly Wool 4749050cce1SVitaly Wool spin_lock(&pool->lock); 475d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 4769050cce1SVitaly Wool spin_unlock(&pool->lock); 477d30561c5SVitaly Wool 478d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 479d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 480d30561c5SVitaly Wool } 481d30561c5SVitaly Wool 48273d47193SRuan Jinjie static inline int put_z3fold_locked(struct z3fold_header *zhdr) 48373d47193SRuan Jinjie { 48473d47193SRuan Jinjie return kref_put(&zhdr->refcount, release_z3fold_page_locked); 48573d47193SRuan Jinjie } 48673d47193SRuan Jinjie 48773d47193SRuan Jinjie static inline int put_z3fold_locked_list(struct z3fold_header *zhdr) 48873d47193SRuan Jinjie { 48973d47193SRuan Jinjie return kref_put(&zhdr->refcount, release_z3fold_page_locked_list); 49073d47193SRuan Jinjie } 49173d47193SRuan Jinjie 492d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w) 493d30561c5SVitaly Wool { 494d30561c5SVitaly Wool struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 495d30561c5SVitaly Wool 496d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 497d30561c5SVitaly Wool while (!list_empty(&pool->stale)) { 498d30561c5SVitaly Wool struct z3fold_header *zhdr = list_first_entry(&pool->stale, 499d30561c5SVitaly Wool struct z3fold_header, buddy); 500d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 501d30561c5SVitaly Wool 502d30561c5SVitaly Wool list_del(&zhdr->buddy); 503d30561c5SVitaly Wool if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 504d30561c5SVitaly Wool continue; 505d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 506d30561c5SVitaly Wool cancel_work_sync(&zhdr->work); 5071f862989SVitaly Wool free_z3fold_page(page, false); 508d30561c5SVitaly Wool cond_resched(); 509d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 510d30561c5SVitaly Wool } 511d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 512d30561c5SVitaly Wool } 513d30561c5SVitaly Wool 5149a001fc1SVitaly Wool /* 5159a001fc1SVitaly Wool * Returns the number of free chunks in a z3fold page. 5169a001fc1SVitaly Wool * NB: can't be used with HEADLESS pages. 5179a001fc1SVitaly Wool */ 5189a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr) 5199a001fc1SVitaly Wool { 5209a001fc1SVitaly Wool int nfree; 5219a001fc1SVitaly Wool /* 5229a001fc1SVitaly Wool * If there is a middle object, pick up the bigger free space 5239a001fc1SVitaly Wool * either before or after it. Otherwise just subtract the number 5249a001fc1SVitaly Wool * of chunks occupied by the first and the last objects. 5259a001fc1SVitaly Wool */ 5269a001fc1SVitaly Wool if (zhdr->middle_chunks != 0) { 5279a001fc1SVitaly Wool int nfree_before = zhdr->first_chunks ? 528ede93213SVitaly Wool 0 : zhdr->start_middle - ZHDR_CHUNKS; 5299a001fc1SVitaly Wool int nfree_after = zhdr->last_chunks ? 530ede93213SVitaly Wool 0 : TOTAL_CHUNKS - 531ede93213SVitaly Wool (zhdr->start_middle + zhdr->middle_chunks); 5329a001fc1SVitaly Wool nfree = max(nfree_before, nfree_after); 5339a001fc1SVitaly Wool } else 5349a001fc1SVitaly Wool nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 5359a001fc1SVitaly Wool return nfree; 5369a001fc1SVitaly Wool } 5379a001fc1SVitaly Wool 5389050cce1SVitaly Wool /* Add to the appropriate unbuddied list */ 5399050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool, 5409050cce1SVitaly Wool struct z3fold_header *zhdr) 5419050cce1SVitaly Wool { 5429050cce1SVitaly Wool if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 5439050cce1SVitaly Wool zhdr->middle_chunks == 0) { 544135f97fdSVitaly Wool struct list_head *unbuddied; 5459050cce1SVitaly Wool int freechunks = num_free_chunks(zhdr); 546135f97fdSVitaly Wool 547135f97fdSVitaly Wool migrate_disable(); 548135f97fdSVitaly Wool unbuddied = this_cpu_ptr(pool->unbuddied); 5499050cce1SVitaly Wool spin_lock(&pool->lock); 5509050cce1SVitaly Wool list_add(&zhdr->buddy, &unbuddied[freechunks]); 5519050cce1SVitaly Wool spin_unlock(&pool->lock); 5529050cce1SVitaly Wool zhdr->cpu = smp_processor_id(); 553135f97fdSVitaly Wool migrate_enable(); 5549050cce1SVitaly Wool } 5559050cce1SVitaly Wool } 5569050cce1SVitaly Wool 557dcf5aedbSVitaly Wool static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 558dcf5aedbSVitaly Wool { 559dcf5aedbSVitaly Wool enum buddy bud = HEADLESS; 560dcf5aedbSVitaly Wool 561dcf5aedbSVitaly Wool if (zhdr->middle_chunks) { 562dcf5aedbSVitaly Wool if (!zhdr->first_chunks && 563dcf5aedbSVitaly Wool chunks <= zhdr->start_middle - ZHDR_CHUNKS) 564dcf5aedbSVitaly Wool bud = FIRST; 565dcf5aedbSVitaly Wool else if (!zhdr->last_chunks) 566dcf5aedbSVitaly Wool bud = LAST; 567dcf5aedbSVitaly Wool } else { 568dcf5aedbSVitaly Wool if (!zhdr->first_chunks) 569dcf5aedbSVitaly Wool bud = FIRST; 570dcf5aedbSVitaly Wool else if (!zhdr->last_chunks) 571dcf5aedbSVitaly Wool bud = LAST; 572dcf5aedbSVitaly Wool else 573dcf5aedbSVitaly Wool bud = MIDDLE; 574dcf5aedbSVitaly Wool } 575dcf5aedbSVitaly Wool 576dcf5aedbSVitaly Wool return bud; 577dcf5aedbSVitaly Wool } 578dcf5aedbSVitaly Wool 579ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr, 580ede93213SVitaly Wool unsigned short dst_chunk) 581ede93213SVitaly Wool { 582ede93213SVitaly Wool void *beg = zhdr; 583ede93213SVitaly Wool return memmove(beg + (dst_chunk << CHUNK_SHIFT), 584ede93213SVitaly Wool beg + (zhdr->start_middle << CHUNK_SHIFT), 585ede93213SVitaly Wool zhdr->middle_chunks << CHUNK_SHIFT); 586ede93213SVitaly Wool } 587ede93213SVitaly Wool 5884a3ac931SVitaly Wool static inline bool buddy_single(struct z3fold_header *zhdr) 5894a3ac931SVitaly Wool { 5904a3ac931SVitaly Wool return !((zhdr->first_chunks && zhdr->middle_chunks) || 5914a3ac931SVitaly Wool (zhdr->first_chunks && zhdr->last_chunks) || 5924a3ac931SVitaly Wool (zhdr->middle_chunks && zhdr->last_chunks)); 5934a3ac931SVitaly Wool } 5944a3ac931SVitaly Wool 5954a3ac931SVitaly Wool static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 5964a3ac931SVitaly Wool { 5974a3ac931SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 5984a3ac931SVitaly Wool void *p = zhdr; 5994a3ac931SVitaly Wool unsigned long old_handle = 0; 6004a3ac931SVitaly Wool size_t sz = 0; 6014a3ac931SVitaly Wool struct z3fold_header *new_zhdr = NULL; 6024a3ac931SVitaly Wool int first_idx = __idx(zhdr, FIRST); 6034a3ac931SVitaly Wool int middle_idx = __idx(zhdr, MIDDLE); 6044a3ac931SVitaly Wool int last_idx = __idx(zhdr, LAST); 6054a3ac931SVitaly Wool unsigned short *moved_chunks = NULL; 6064a3ac931SVitaly Wool 6074a3ac931SVitaly Wool /* 6084a3ac931SVitaly Wool * No need to protect slots here -- all the slots are "local" and 6094a3ac931SVitaly Wool * the page lock is already taken 6104a3ac931SVitaly Wool */ 6114a3ac931SVitaly Wool if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 6124a3ac931SVitaly Wool p += ZHDR_SIZE_ALIGNED; 6134a3ac931SVitaly Wool sz = zhdr->first_chunks << CHUNK_SHIFT; 6144a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 6154a3ac931SVitaly Wool moved_chunks = &zhdr->first_chunks; 6164a3ac931SVitaly Wool } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 6174a3ac931SVitaly Wool p += zhdr->start_middle << CHUNK_SHIFT; 6184a3ac931SVitaly Wool sz = zhdr->middle_chunks << CHUNK_SHIFT; 6194a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 6204a3ac931SVitaly Wool moved_chunks = &zhdr->middle_chunks; 6214a3ac931SVitaly Wool } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 6224a3ac931SVitaly Wool p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 6234a3ac931SVitaly Wool sz = zhdr->last_chunks << CHUNK_SHIFT; 6244a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 6254a3ac931SVitaly Wool moved_chunks = &zhdr->last_chunks; 6264a3ac931SVitaly Wool } 6274a3ac931SVitaly Wool 6284a3ac931SVitaly Wool if (sz > 0) { 6294a3ac931SVitaly Wool enum buddy new_bud = HEADLESS; 6304a3ac931SVitaly Wool short chunks = size_to_chunks(sz); 6314a3ac931SVitaly Wool void *q; 6324a3ac931SVitaly Wool 6334a3ac931SVitaly Wool new_zhdr = __z3fold_alloc(pool, sz, false); 6344a3ac931SVitaly Wool if (!new_zhdr) 6354a3ac931SVitaly Wool return NULL; 6364a3ac931SVitaly Wool 6374a3ac931SVitaly Wool if (WARN_ON(new_zhdr == zhdr)) 6384a3ac931SVitaly Wool goto out_fail; 6394a3ac931SVitaly Wool 640dcf5aedbSVitaly Wool new_bud = get_free_buddy(new_zhdr, chunks); 6414a3ac931SVitaly Wool q = new_zhdr; 6424a3ac931SVitaly Wool switch (new_bud) { 6434a3ac931SVitaly Wool case FIRST: 6444a3ac931SVitaly Wool new_zhdr->first_chunks = chunks; 6454a3ac931SVitaly Wool q += ZHDR_SIZE_ALIGNED; 6464a3ac931SVitaly Wool break; 6474a3ac931SVitaly Wool case MIDDLE: 6484a3ac931SVitaly Wool new_zhdr->middle_chunks = chunks; 6494a3ac931SVitaly Wool new_zhdr->start_middle = 6504a3ac931SVitaly Wool new_zhdr->first_chunks + ZHDR_CHUNKS; 6514a3ac931SVitaly Wool q += new_zhdr->start_middle << CHUNK_SHIFT; 6524a3ac931SVitaly Wool break; 6534a3ac931SVitaly Wool case LAST: 6544a3ac931SVitaly Wool new_zhdr->last_chunks = chunks; 6554a3ac931SVitaly Wool q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 6564a3ac931SVitaly Wool break; 6574a3ac931SVitaly Wool default: 6584a3ac931SVitaly Wool goto out_fail; 6594a3ac931SVitaly Wool } 6604a3ac931SVitaly Wool new_zhdr->foreign_handles++; 6614a3ac931SVitaly Wool memcpy(q, p, sz); 6624a3ac931SVitaly Wool write_lock(&zhdr->slots->lock); 6634a3ac931SVitaly Wool *(unsigned long *)old_handle = (unsigned long)new_zhdr + 6644a3ac931SVitaly Wool __idx(new_zhdr, new_bud); 6654a3ac931SVitaly Wool if (new_bud == LAST) 6664a3ac931SVitaly Wool *(unsigned long *)old_handle |= 6674a3ac931SVitaly Wool (new_zhdr->last_chunks << BUDDY_SHIFT); 6684a3ac931SVitaly Wool write_unlock(&zhdr->slots->lock); 6694a3ac931SVitaly Wool add_to_unbuddied(pool, new_zhdr); 6704a3ac931SVitaly Wool z3fold_page_unlock(new_zhdr); 6714a3ac931SVitaly Wool 6724a3ac931SVitaly Wool *moved_chunks = 0; 6734a3ac931SVitaly Wool } 6744a3ac931SVitaly Wool 6754a3ac931SVitaly Wool return new_zhdr; 6764a3ac931SVitaly Wool 6774a3ac931SVitaly Wool out_fail: 67873d47193SRuan Jinjie if (new_zhdr && !put_z3fold_locked(new_zhdr)) { 6794a3ac931SVitaly Wool add_to_unbuddied(pool, new_zhdr); 6804a3ac931SVitaly Wool z3fold_page_unlock(new_zhdr); 6814a3ac931SVitaly Wool } 6824a3ac931SVitaly Wool return NULL; 6834a3ac931SVitaly Wool 6844a3ac931SVitaly Wool } 6854a3ac931SVitaly Wool 6861b096e5aSVitaly Wool #define BIG_CHUNK_GAP 3 6879a001fc1SVitaly Wool /* Has to be called with lock held */ 6889a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr) 6899a001fc1SVitaly Wool { 6909a001fc1SVitaly Wool struct page *page = virt_to_page(zhdr); 6919a001fc1SVitaly Wool 692ede93213SVitaly Wool if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 693ede93213SVitaly Wool return 0; /* can't move middle chunk, it's used */ 6949a001fc1SVitaly Wool 6951f862989SVitaly Wool if (unlikely(PageIsolated(page))) 6961f862989SVitaly Wool return 0; 6971f862989SVitaly Wool 698ede93213SVitaly Wool if (zhdr->middle_chunks == 0) 699ede93213SVitaly Wool return 0; /* nothing to compact */ 700ede93213SVitaly Wool 701ede93213SVitaly Wool if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 702ede93213SVitaly Wool /* move to the beginning */ 703ede93213SVitaly Wool mchunk_memmove(zhdr, ZHDR_CHUNKS); 7049a001fc1SVitaly Wool zhdr->first_chunks = zhdr->middle_chunks; 7059a001fc1SVitaly Wool zhdr->middle_chunks = 0; 7069a001fc1SVitaly Wool zhdr->start_middle = 0; 7079a001fc1SVitaly Wool zhdr->first_num++; 708ede93213SVitaly Wool return 1; 7099a001fc1SVitaly Wool } 7109a001fc1SVitaly Wool 7111b096e5aSVitaly Wool /* 7121b096e5aSVitaly Wool * moving data is expensive, so let's only do that if 7131b096e5aSVitaly Wool * there's substantial gain (at least BIG_CHUNK_GAP chunks) 7141b096e5aSVitaly Wool */ 7151b096e5aSVitaly Wool if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 7161b096e5aSVitaly Wool zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 7171b096e5aSVitaly Wool BIG_CHUNK_GAP) { 7181b096e5aSVitaly Wool mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 7191b096e5aSVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 7201b096e5aSVitaly Wool return 1; 7211b096e5aSVitaly Wool } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 7221b096e5aSVitaly Wool TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 7231b096e5aSVitaly Wool + zhdr->middle_chunks) >= 7241b096e5aSVitaly Wool BIG_CHUNK_GAP) { 7251b096e5aSVitaly Wool unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 7261b096e5aSVitaly Wool zhdr->middle_chunks; 7271b096e5aSVitaly Wool mchunk_memmove(zhdr, new_start); 7281b096e5aSVitaly Wool zhdr->start_middle = new_start; 7291b096e5aSVitaly Wool return 1; 7301b096e5aSVitaly Wool } 7311b096e5aSVitaly Wool 7321b096e5aSVitaly Wool return 0; 7331b096e5aSVitaly Wool } 7341b096e5aSVitaly Wool 735d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked) 736d30561c5SVitaly Wool { 7379050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 738d30561c5SVitaly Wool struct page *page; 739d30561c5SVitaly Wool 740d30561c5SVitaly Wool page = virt_to_page(zhdr); 741d30561c5SVitaly Wool if (locked) 742d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 743d30561c5SVitaly Wool else 744d30561c5SVitaly Wool z3fold_page_lock(zhdr); 7455d03a661SVitaly Wool if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 746d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 747d30561c5SVitaly Wool return; 748d30561c5SVitaly Wool } 749d30561c5SVitaly Wool spin_lock(&pool->lock); 750d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 751d30561c5SVitaly Wool spin_unlock(&pool->lock); 752d30561c5SVitaly Wool 75373d47193SRuan Jinjie if (put_z3fold_locked(zhdr)) 7545d03a661SVitaly Wool return; 7555d03a661SVitaly Wool 756dcf5aedbSVitaly Wool if (test_bit(PAGE_STALE, &page->private) || 757dcf5aedbSVitaly Wool test_and_set_bit(PAGE_CLAIMED, &page->private)) { 7581f862989SVitaly Wool z3fold_page_unlock(zhdr); 7591f862989SVitaly Wool return; 7601f862989SVitaly Wool } 7611f862989SVitaly Wool 7624a3ac931SVitaly Wool if (!zhdr->foreign_handles && buddy_single(zhdr) && 7634a3ac931SVitaly Wool zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 76473d47193SRuan Jinjie if (!put_z3fold_locked(zhdr)) { 765dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 7664a3ac931SVitaly Wool z3fold_page_unlock(zhdr); 767dcf5aedbSVitaly Wool } 7684a3ac931SVitaly Wool return; 7694a3ac931SVitaly Wool } 7704a3ac931SVitaly Wool 771d30561c5SVitaly Wool z3fold_compact_page(zhdr); 7729050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 773dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 774d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 775d30561c5SVitaly Wool } 776d30561c5SVitaly Wool 777d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w) 778d30561c5SVitaly Wool { 779d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 780d30561c5SVitaly Wool work); 781d30561c5SVitaly Wool 782d30561c5SVitaly Wool do_compact_page(zhdr, false); 783d30561c5SVitaly Wool } 784d30561c5SVitaly Wool 7859050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */ 7869050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 7879050cce1SVitaly Wool size_t size, bool can_sleep) 7889050cce1SVitaly Wool { 7899050cce1SVitaly Wool struct z3fold_header *zhdr = NULL; 7909050cce1SVitaly Wool struct page *page; 7919050cce1SVitaly Wool struct list_head *unbuddied; 7929050cce1SVitaly Wool int chunks = size_to_chunks(size), i; 7939050cce1SVitaly Wool 7949050cce1SVitaly Wool lookup: 795135f97fdSVitaly Wool migrate_disable(); 7969050cce1SVitaly Wool /* First, try to find an unbuddied z3fold page. */ 797135f97fdSVitaly Wool unbuddied = this_cpu_ptr(pool->unbuddied); 7989050cce1SVitaly Wool for_each_unbuddied_list(i, chunks) { 7999050cce1SVitaly Wool struct list_head *l = &unbuddied[i]; 8009050cce1SVitaly Wool 8019050cce1SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 8029050cce1SVitaly Wool struct z3fold_header, buddy); 8039050cce1SVitaly Wool 8049050cce1SVitaly Wool if (!zhdr) 8059050cce1SVitaly Wool continue; 8069050cce1SVitaly Wool 8079050cce1SVitaly Wool /* Re-check under lock. */ 8089050cce1SVitaly Wool spin_lock(&pool->lock); 8099050cce1SVitaly Wool if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 8109050cce1SVitaly Wool struct z3fold_header, buddy)) || 8119050cce1SVitaly Wool !z3fold_page_trylock(zhdr)) { 8129050cce1SVitaly Wool spin_unlock(&pool->lock); 8139050cce1SVitaly Wool zhdr = NULL; 814135f97fdSVitaly Wool migrate_enable(); 8159050cce1SVitaly Wool if (can_sleep) 8169050cce1SVitaly Wool cond_resched(); 8179050cce1SVitaly Wool goto lookup; 8189050cce1SVitaly Wool } 8199050cce1SVitaly Wool list_del_init(&zhdr->buddy); 8209050cce1SVitaly Wool zhdr->cpu = -1; 8219050cce1SVitaly Wool spin_unlock(&pool->lock); 8229050cce1SVitaly Wool 8239050cce1SVitaly Wool page = virt_to_page(zhdr); 8244a3ac931SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 8254a3ac931SVitaly Wool test_bit(PAGE_CLAIMED, &page->private)) { 8269050cce1SVitaly Wool z3fold_page_unlock(zhdr); 8279050cce1SVitaly Wool zhdr = NULL; 828135f97fdSVitaly Wool migrate_enable(); 8299050cce1SVitaly Wool if (can_sleep) 8309050cce1SVitaly Wool cond_resched(); 8319050cce1SVitaly Wool goto lookup; 8329050cce1SVitaly Wool } 8339050cce1SVitaly Wool 8349050cce1SVitaly Wool /* 8359050cce1SVitaly Wool * this page could not be removed from its unbuddied 8369050cce1SVitaly Wool * list while pool lock was held, and then we've taken 8379050cce1SVitaly Wool * page lock so kref_put could not be called before 8389050cce1SVitaly Wool * we got here, so it's safe to just call kref_get() 8399050cce1SVitaly Wool */ 8409050cce1SVitaly Wool kref_get(&zhdr->refcount); 8419050cce1SVitaly Wool break; 8429050cce1SVitaly Wool } 843135f97fdSVitaly Wool migrate_enable(); 8449050cce1SVitaly Wool 845351618b2SVitaly Wool if (!zhdr) { 846351618b2SVitaly Wool int cpu; 847351618b2SVitaly Wool 848351618b2SVitaly Wool /* look for _exact_ match on other cpus' lists */ 849351618b2SVitaly Wool for_each_online_cpu(cpu) { 850351618b2SVitaly Wool struct list_head *l; 851351618b2SVitaly Wool 852351618b2SVitaly Wool unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 853351618b2SVitaly Wool spin_lock(&pool->lock); 854351618b2SVitaly Wool l = &unbuddied[chunks]; 855351618b2SVitaly Wool 856351618b2SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 857351618b2SVitaly Wool struct z3fold_header, buddy); 858351618b2SVitaly Wool 859351618b2SVitaly Wool if (!zhdr || !z3fold_page_trylock(zhdr)) { 860351618b2SVitaly Wool spin_unlock(&pool->lock); 861351618b2SVitaly Wool zhdr = NULL; 862351618b2SVitaly Wool continue; 863351618b2SVitaly Wool } 864351618b2SVitaly Wool list_del_init(&zhdr->buddy); 865351618b2SVitaly Wool zhdr->cpu = -1; 866351618b2SVitaly Wool spin_unlock(&pool->lock); 867351618b2SVitaly Wool 868351618b2SVitaly Wool page = virt_to_page(zhdr); 8694a3ac931SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 8704a3ac931SVitaly Wool test_bit(PAGE_CLAIMED, &page->private)) { 871351618b2SVitaly Wool z3fold_page_unlock(zhdr); 872351618b2SVitaly Wool zhdr = NULL; 873351618b2SVitaly Wool if (can_sleep) 874351618b2SVitaly Wool cond_resched(); 875351618b2SVitaly Wool continue; 876351618b2SVitaly Wool } 877351618b2SVitaly Wool kref_get(&zhdr->refcount); 878351618b2SVitaly Wool break; 879351618b2SVitaly Wool } 880351618b2SVitaly Wool } 881351618b2SVitaly Wool 8827c61c35bSMiaohe Lin if (zhdr && !zhdr->slots) { 8834c6bdb36SMiaohe Lin zhdr->slots = alloc_slots(pool, GFP_ATOMIC); 8847c61c35bSMiaohe Lin if (!zhdr->slots) 8857c61c35bSMiaohe Lin goto out_fail; 8867c61c35bSMiaohe Lin } 8879050cce1SVitaly Wool return zhdr; 8887c61c35bSMiaohe Lin 8897c61c35bSMiaohe Lin out_fail: 89073d47193SRuan Jinjie if (!put_z3fold_locked(zhdr)) { 8917c61c35bSMiaohe Lin add_to_unbuddied(pool, zhdr); 8927c61c35bSMiaohe Lin z3fold_page_unlock(zhdr); 8937c61c35bSMiaohe Lin } 8947c61c35bSMiaohe Lin return NULL; 8959050cce1SVitaly Wool } 896d30561c5SVitaly Wool 897d30561c5SVitaly Wool /* 898d30561c5SVitaly Wool * API Functions 899d30561c5SVitaly Wool */ 900d30561c5SVitaly Wool 901d30561c5SVitaly Wool /** 902d30561c5SVitaly Wool * z3fold_create_pool() - create a new z3fold pool 903d30561c5SVitaly Wool * @name: pool name 904d30561c5SVitaly Wool * @gfp: gfp flags when allocating the z3fold pool structure 905d30561c5SVitaly Wool * 906d30561c5SVitaly Wool * Return: pointer to the new z3fold pool or NULL if the metadata allocation 907d30561c5SVitaly Wool * failed. 908d30561c5SVitaly Wool */ 9096a05aa30SJohannes Weiner static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp) 910d30561c5SVitaly Wool { 911d30561c5SVitaly Wool struct z3fold_pool *pool = NULL; 912d30561c5SVitaly Wool int i, cpu; 913d30561c5SVitaly Wool 914d30561c5SVitaly Wool pool = kzalloc(sizeof(struct z3fold_pool), gfp); 915d30561c5SVitaly Wool if (!pool) 916d30561c5SVitaly Wool goto out; 9177c2b8baaSVitaly Wool pool->c_handle = kmem_cache_create("z3fold_handle", 9187c2b8baaSVitaly Wool sizeof(struct z3fold_buddy_slots), 9197c2b8baaSVitaly Wool SLOTS_ALIGN, 0, NULL); 9207c2b8baaSVitaly Wool if (!pool->c_handle) 9217c2b8baaSVitaly Wool goto out_c; 922d30561c5SVitaly Wool spin_lock_init(&pool->lock); 923d30561c5SVitaly Wool spin_lock_init(&pool->stale_lock); 924e891f60eSMiaohe Lin pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, 925e891f60eSMiaohe Lin __alignof__(struct list_head)); 9261ec6995dSXidong Wang if (!pool->unbuddied) 9271ec6995dSXidong Wang goto out_pool; 928d30561c5SVitaly Wool for_each_possible_cpu(cpu) { 929d30561c5SVitaly Wool struct list_head *unbuddied = 930d30561c5SVitaly Wool per_cpu_ptr(pool->unbuddied, cpu); 931d30561c5SVitaly Wool for_each_unbuddied_list(i, 0) 932d30561c5SVitaly Wool INIT_LIST_HEAD(&unbuddied[i]); 933d30561c5SVitaly Wool } 934d30561c5SVitaly Wool INIT_LIST_HEAD(&pool->stale); 935d30561c5SVitaly Wool atomic64_set(&pool->pages_nr, 0); 936d30561c5SVitaly Wool pool->name = name; 937d30561c5SVitaly Wool pool->compact_wq = create_singlethread_workqueue(pool->name); 938d30561c5SVitaly Wool if (!pool->compact_wq) 9391ec6995dSXidong Wang goto out_unbuddied; 940d30561c5SVitaly Wool pool->release_wq = create_singlethread_workqueue(pool->name); 941d30561c5SVitaly Wool if (!pool->release_wq) 942d30561c5SVitaly Wool goto out_wq; 943d30561c5SVitaly Wool INIT_WORK(&pool->work, free_pages_work); 944d30561c5SVitaly Wool return pool; 945d30561c5SVitaly Wool 946d30561c5SVitaly Wool out_wq: 947d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 9481ec6995dSXidong Wang out_unbuddied: 9491ec6995dSXidong Wang free_percpu(pool->unbuddied); 9501ec6995dSXidong Wang out_pool: 9517c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 9527c2b8baaSVitaly Wool out_c: 953d30561c5SVitaly Wool kfree(pool); 9541ec6995dSXidong Wang out: 955d30561c5SVitaly Wool return NULL; 956d30561c5SVitaly Wool } 957d30561c5SVitaly Wool 958d30561c5SVitaly Wool /** 959d30561c5SVitaly Wool * z3fold_destroy_pool() - destroys an existing z3fold pool 960d30561c5SVitaly Wool * @pool: the z3fold pool to be destroyed 961d30561c5SVitaly Wool * 962d30561c5SVitaly Wool * The pool should be emptied before this function is called. 963d30561c5SVitaly Wool */ 964d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool) 965d30561c5SVitaly Wool { 9667c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 9676051d3bdSHenry Burns 9686051d3bdSHenry Burns /* 9696051d3bdSHenry Burns * We need to destroy pool->compact_wq before pool->release_wq, 9706051d3bdSHenry Burns * as any pending work on pool->compact_wq will call 9716051d3bdSHenry Burns * queue_work(pool->release_wq, &pool->work). 972b997052bSHenry Burns * 973b997052bSHenry Burns * There are still outstanding pages until both workqueues are drained, 974b997052bSHenry Burns * so we cannot unregister migration until then. 9756051d3bdSHenry Burns */ 9766051d3bdSHenry Burns 977d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 9786051d3bdSHenry Burns destroy_workqueue(pool->release_wq); 979dac0d1cfSMiaohe Lin free_percpu(pool->unbuddied); 980d30561c5SVitaly Wool kfree(pool); 981d30561c5SVitaly Wool } 982d30561c5SVitaly Wool 98368f2736aSMatthew Wilcox (Oracle) static const struct movable_operations z3fold_mops; 98468f2736aSMatthew Wilcox (Oracle) 9859a001fc1SVitaly Wool /** 9869a001fc1SVitaly Wool * z3fold_alloc() - allocates a region of a given size 9879a001fc1SVitaly Wool * @pool: z3fold pool from which to allocate 9889a001fc1SVitaly Wool * @size: size in bytes of the desired allocation 9899a001fc1SVitaly Wool * @gfp: gfp flags used if the pool needs to grow 9909a001fc1SVitaly Wool * @handle: handle of the new allocation 9919a001fc1SVitaly Wool * 9929a001fc1SVitaly Wool * This function will attempt to find a free region in the pool large enough to 9939a001fc1SVitaly Wool * satisfy the allocation request. A search of the unbuddied lists is 9949a001fc1SVitaly Wool * performed first. If no suitable free region is found, then a new page is 9959a001fc1SVitaly Wool * allocated and added to the pool to satisfy the request. 9969a001fc1SVitaly Wool * 9979a001fc1SVitaly Wool * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 9989a001fc1SVitaly Wool * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 9999a001fc1SVitaly Wool * a new page. 10009a001fc1SVitaly Wool */ 10019a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 10029a001fc1SVitaly Wool unsigned long *handle) 10039a001fc1SVitaly Wool { 10049050cce1SVitaly Wool int chunks = size_to_chunks(size); 10059a001fc1SVitaly Wool struct z3fold_header *zhdr = NULL; 1006d30561c5SVitaly Wool struct page *page = NULL; 10079a001fc1SVitaly Wool enum buddy bud; 10088a97ea54SMatthew Wilcox bool can_sleep = gfpflags_allow_blocking(gfp); 10099a001fc1SVitaly Wool 1010f4bad643SMiaohe Lin if (!size || (gfp & __GFP_HIGHMEM)) 10119a001fc1SVitaly Wool return -EINVAL; 10129a001fc1SVitaly Wool 10139a001fc1SVitaly Wool if (size > PAGE_SIZE) 10149a001fc1SVitaly Wool return -ENOSPC; 10159a001fc1SVitaly Wool 10169a001fc1SVitaly Wool if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 10179a001fc1SVitaly Wool bud = HEADLESS; 10189a001fc1SVitaly Wool else { 10199050cce1SVitaly Wool retry: 10209050cce1SVitaly Wool zhdr = __z3fold_alloc(pool, size, can_sleep); 1021d30561c5SVitaly Wool if (zhdr) { 1022dcf5aedbSVitaly Wool bud = get_free_buddy(zhdr, chunks); 1023dcf5aedbSVitaly Wool if (bud == HEADLESS) { 102473d47193SRuan Jinjie if (!put_z3fold_locked(zhdr)) 1025d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 10269a001fc1SVitaly Wool pr_err("No free chunks in unbuddied\n"); 10279a001fc1SVitaly Wool WARN_ON(1); 10289050cce1SVitaly Wool goto retry; 10299a001fc1SVitaly Wool } 10309050cce1SVitaly Wool page = virt_to_page(zhdr); 10319a001fc1SVitaly Wool goto found; 10329a001fc1SVitaly Wool } 10339a001fc1SVitaly Wool bud = FIRST; 10349a001fc1SVitaly Wool } 10359a001fc1SVitaly Wool 10365c9bab59SVitaly Wool page = alloc_page(gfp); 10379a001fc1SVitaly Wool if (!page) 10389a001fc1SVitaly Wool return -ENOMEM; 10392f1e5e4dSVitaly Wool 104063398413SVitaly Wool zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 10419050cce1SVitaly Wool if (!zhdr) { 10429050cce1SVitaly Wool __free_page(page); 10439050cce1SVitaly Wool return -ENOMEM; 10449050cce1SVitaly Wool } 10459050cce1SVitaly Wool atomic64_inc(&pool->pages_nr); 10469a001fc1SVitaly Wool 10479a001fc1SVitaly Wool if (bud == HEADLESS) { 10489a001fc1SVitaly Wool set_bit(PAGE_HEADLESS, &page->private); 10499a001fc1SVitaly Wool goto headless; 10509a001fc1SVitaly Wool } 1051810481a2SHenry Burns if (can_sleep) { 1052810481a2SHenry Burns lock_page(page); 105368f2736aSMatthew Wilcox (Oracle) __SetPageMovable(page, &z3fold_mops); 1054810481a2SHenry Burns unlock_page(page); 1055810481a2SHenry Burns } else { 10562c0f3514SMiaohe Lin WARN_ON(!trylock_page(page)); 105768f2736aSMatthew Wilcox (Oracle) __SetPageMovable(page, &z3fold_mops); 1058810481a2SHenry Burns unlock_page(page); 1059810481a2SHenry Burns } 10602f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 10619a001fc1SVitaly Wool 10629a001fc1SVitaly Wool found: 10639a001fc1SVitaly Wool if (bud == FIRST) 10649a001fc1SVitaly Wool zhdr->first_chunks = chunks; 10659a001fc1SVitaly Wool else if (bud == LAST) 10669a001fc1SVitaly Wool zhdr->last_chunks = chunks; 10679a001fc1SVitaly Wool else { 10689a001fc1SVitaly Wool zhdr->middle_chunks = chunks; 1069ede93213SVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 10709a001fc1SVitaly Wool } 10719050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 10729a001fc1SVitaly Wool 10739a001fc1SVitaly Wool headless: 1074d30561c5SVitaly Wool spin_lock(&pool->lock); 10759a001fc1SVitaly Wool *handle = encode_handle(zhdr, bud); 10769a001fc1SVitaly Wool spin_unlock(&pool->lock); 10772f1e5e4dSVitaly Wool if (bud != HEADLESS) 10782f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 10799a001fc1SVitaly Wool 10809a001fc1SVitaly Wool return 0; 10819a001fc1SVitaly Wool } 10829a001fc1SVitaly Wool 10839a001fc1SVitaly Wool /** 10849a001fc1SVitaly Wool * z3fold_free() - frees the allocation associated with the given handle 10859a001fc1SVitaly Wool * @pool: pool in which the allocation resided 10869a001fc1SVitaly Wool * @handle: handle associated with the allocation returned by z3fold_alloc() 10879a001fc1SVitaly Wool * 10889a001fc1SVitaly Wool * In the case that the z3fold page in which the allocation resides is under 1089ed0e5dcaSMiaohe Lin * reclaim, as indicated by the PAGE_CLAIMED flag being set, this function 1090ed0e5dcaSMiaohe Lin * only sets the first|middle|last_chunks to 0. The page is actually freed 1091ed0e5dcaSMiaohe Lin * once all buddies are evicted (see z3fold_reclaim_page() below). 10929a001fc1SVitaly Wool */ 10939a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 10949a001fc1SVitaly Wool { 10959a001fc1SVitaly Wool struct z3fold_header *zhdr; 10969a001fc1SVitaly Wool struct page *page; 10979a001fc1SVitaly Wool enum buddy bud; 10985b6807deSVitaly Wool bool page_claimed; 10999a001fc1SVitaly Wool 11004a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 11019a001fc1SVitaly Wool page = virt_to_page(zhdr); 11025b6807deSVitaly Wool page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 11039a001fc1SVitaly Wool 11049a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) { 1105ca0246bbSVitaly Wool /* if a headless page is under reclaim, just leave. 1106ca0246bbSVitaly Wool * NB: we use test_and_set_bit for a reason: if the bit 1107ca0246bbSVitaly Wool * has not been set before, we release this page 1108ca0246bbSVitaly Wool * immediately so we don't care about its value any more. 1109ca0246bbSVitaly Wool */ 11105b6807deSVitaly Wool if (!page_claimed) { 11114a3ac931SVitaly Wool put_z3fold_header(zhdr); 11121f862989SVitaly Wool free_z3fold_page(page, true); 1113ca0246bbSVitaly Wool atomic64_dec(&pool->pages_nr); 1114ca0246bbSVitaly Wool } 1115ca0246bbSVitaly Wool return; 1116ca0246bbSVitaly Wool } 1117ca0246bbSVitaly Wool 1118ca0246bbSVitaly Wool /* Non-headless case */ 111943afc194SVitaly Wool bud = handle_to_buddy(handle); 11209a001fc1SVitaly Wool 11219a001fc1SVitaly Wool switch (bud) { 11229a001fc1SVitaly Wool case FIRST: 11239a001fc1SVitaly Wool zhdr->first_chunks = 0; 11249a001fc1SVitaly Wool break; 11259a001fc1SVitaly Wool case MIDDLE: 11269a001fc1SVitaly Wool zhdr->middle_chunks = 0; 11279a001fc1SVitaly Wool break; 11289a001fc1SVitaly Wool case LAST: 11299a001fc1SVitaly Wool zhdr->last_chunks = 0; 11309a001fc1SVitaly Wool break; 11319a001fc1SVitaly Wool default: 11329a001fc1SVitaly Wool pr_err("%s: unknown bud %d\n", __func__, bud); 11339a001fc1SVitaly Wool WARN_ON(1); 11344a3ac931SVitaly Wool put_z3fold_header(zhdr); 11359a001fc1SVitaly Wool return; 11369a001fc1SVitaly Wool } 11379a001fc1SVitaly Wool 11384a3ac931SVitaly Wool if (!page_claimed) 1139fc548865SVitaly Wool free_handle(handle, zhdr); 114073d47193SRuan Jinjie if (put_z3fold_locked_list(zhdr)) 1141d30561c5SVitaly Wool return; 11425b6807deSVitaly Wool if (page_claimed) { 11435b6807deSVitaly Wool /* the page has not been claimed by us */ 1144ed0e5dcaSMiaohe Lin put_z3fold_header(zhdr); 11456098d7e1SVitaly Wool return; 11466098d7e1SVitaly Wool } 1147dcf5aedbSVitaly Wool if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 11485b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 11494a1c3839SMiaohe Lin put_z3fold_header(zhdr); 1150d30561c5SVitaly Wool return; 1151d30561c5SVitaly Wool } 1152d30561c5SVitaly Wool if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1153d30561c5SVitaly Wool zhdr->cpu = -1; 11545d03a661SVitaly Wool kref_get(&zhdr->refcount); 11555b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 11564a3ac931SVitaly Wool do_compact_page(zhdr, true); 1157d30561c5SVitaly Wool return; 1158d30561c5SVitaly Wool } 11595d03a661SVitaly Wool kref_get(&zhdr->refcount); 11605b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 11614a3ac931SVitaly Wool queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 11624a3ac931SVitaly Wool put_z3fold_header(zhdr); 11639a001fc1SVitaly Wool } 11649a001fc1SVitaly Wool 11659a001fc1SVitaly Wool /** 11669a001fc1SVitaly Wool * z3fold_map() - maps the allocation associated with the given handle 11679a001fc1SVitaly Wool * @pool: pool in which the allocation resides 11689a001fc1SVitaly Wool * @handle: handle associated with the allocation to be mapped 11699a001fc1SVitaly Wool * 11709a001fc1SVitaly Wool * Extracts the buddy number from handle and constructs the pointer to the 11719a001fc1SVitaly Wool * correct starting chunk within the page. 11729a001fc1SVitaly Wool * 11739a001fc1SVitaly Wool * Returns: a pointer to the mapped allocation 11749a001fc1SVitaly Wool */ 11759a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 11769a001fc1SVitaly Wool { 11779a001fc1SVitaly Wool struct z3fold_header *zhdr; 11789a001fc1SVitaly Wool struct page *page; 11799a001fc1SVitaly Wool void *addr; 11809a001fc1SVitaly Wool enum buddy buddy; 11819a001fc1SVitaly Wool 11824a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 11839a001fc1SVitaly Wool addr = zhdr; 11849a001fc1SVitaly Wool page = virt_to_page(zhdr); 11859a001fc1SVitaly Wool 11869a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 11879a001fc1SVitaly Wool goto out; 11889a001fc1SVitaly Wool 11899a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 11909a001fc1SVitaly Wool switch (buddy) { 11919a001fc1SVitaly Wool case FIRST: 11929a001fc1SVitaly Wool addr += ZHDR_SIZE_ALIGNED; 11939a001fc1SVitaly Wool break; 11949a001fc1SVitaly Wool case MIDDLE: 11959a001fc1SVitaly Wool addr += zhdr->start_middle << CHUNK_SHIFT; 11969a001fc1SVitaly Wool set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 11979a001fc1SVitaly Wool break; 11989a001fc1SVitaly Wool case LAST: 1199ca0246bbSVitaly Wool addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 12009a001fc1SVitaly Wool break; 12019a001fc1SVitaly Wool default: 12029a001fc1SVitaly Wool pr_err("unknown buddy id %d\n", buddy); 12039a001fc1SVitaly Wool WARN_ON(1); 12049a001fc1SVitaly Wool addr = NULL; 12059a001fc1SVitaly Wool break; 12069a001fc1SVitaly Wool } 12072f1e5e4dSVitaly Wool 12081f862989SVitaly Wool if (addr) 12091f862989SVitaly Wool zhdr->mapped_count++; 12109a001fc1SVitaly Wool out: 12114a3ac931SVitaly Wool put_z3fold_header(zhdr); 12129a001fc1SVitaly Wool return addr; 12139a001fc1SVitaly Wool } 12149a001fc1SVitaly Wool 12159a001fc1SVitaly Wool /** 12169a001fc1SVitaly Wool * z3fold_unmap() - unmaps the allocation associated with the given handle 12179a001fc1SVitaly Wool * @pool: pool in which the allocation resides 12189a001fc1SVitaly Wool * @handle: handle associated with the allocation to be unmapped 12199a001fc1SVitaly Wool */ 12209a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 12219a001fc1SVitaly Wool { 12229a001fc1SVitaly Wool struct z3fold_header *zhdr; 12239a001fc1SVitaly Wool struct page *page; 12249a001fc1SVitaly Wool enum buddy buddy; 12259a001fc1SVitaly Wool 12264a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 12279a001fc1SVitaly Wool page = virt_to_page(zhdr); 12289a001fc1SVitaly Wool 12292f1e5e4dSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 12309a001fc1SVitaly Wool return; 12319a001fc1SVitaly Wool 12329a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 12339a001fc1SVitaly Wool if (buddy == MIDDLE) 12349a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 12351f862989SVitaly Wool zhdr->mapped_count--; 12364a3ac931SVitaly Wool put_z3fold_header(zhdr); 12379a001fc1SVitaly Wool } 12389a001fc1SVitaly Wool 12399a001fc1SVitaly Wool /** 12409a001fc1SVitaly Wool * z3fold_get_pool_size() - gets the z3fold pool size in pages 12419a001fc1SVitaly Wool * @pool: pool whose size is being queried 12429a001fc1SVitaly Wool * 124312d59ae6SVitaly Wool * Returns: size in pages of the given pool. 12449a001fc1SVitaly Wool */ 12459a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 12469a001fc1SVitaly Wool { 124712d59ae6SVitaly Wool return atomic64_read(&pool->pages_nr); 12489a001fc1SVitaly Wool } 12499a001fc1SVitaly Wool 12501f862989SVitaly Wool static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 12511f862989SVitaly Wool { 12521f862989SVitaly Wool struct z3fold_header *zhdr; 12531f862989SVitaly Wool struct z3fold_pool *pool; 12541f862989SVitaly Wool 12551f862989SVitaly Wool VM_BUG_ON_PAGE(PageIsolated(page), page); 12561f862989SVitaly Wool 1257dcf5aedbSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 12581f862989SVitaly Wool return false; 12591f862989SVitaly Wool 12601f862989SVitaly Wool zhdr = page_address(page); 12611f862989SVitaly Wool z3fold_page_lock(zhdr); 12621f862989SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 12631f862989SVitaly Wool test_bit(PAGE_STALE, &page->private)) 12641f862989SVitaly Wool goto out; 12651f862989SVitaly Wool 12664a3ac931SVitaly Wool if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 12674a3ac931SVitaly Wool goto out; 12681f862989SVitaly Wool 1269dcf5aedbSVitaly Wool if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 1270dcf5aedbSVitaly Wool goto out; 12714a3ac931SVitaly Wool pool = zhdr_to_pool(zhdr); 12724a3ac931SVitaly Wool spin_lock(&pool->lock); 12731f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 12741f862989SVitaly Wool list_del_init(&zhdr->buddy); 12751f862989SVitaly Wool spin_unlock(&pool->lock); 12764a3ac931SVitaly Wool 12774a3ac931SVitaly Wool kref_get(&zhdr->refcount); 12781f862989SVitaly Wool z3fold_page_unlock(zhdr); 12791f862989SVitaly Wool return true; 12804a3ac931SVitaly Wool 12811f862989SVitaly Wool out: 12821f862989SVitaly Wool z3fold_page_unlock(zhdr); 12831f862989SVitaly Wool return false; 12841f862989SVitaly Wool } 12851f862989SVitaly Wool 128668f2736aSMatthew Wilcox (Oracle) static int z3fold_page_migrate(struct page *newpage, struct page *page, 128768f2736aSMatthew Wilcox (Oracle) enum migrate_mode mode) 12881f862989SVitaly Wool { 12891f862989SVitaly Wool struct z3fold_header *zhdr, *new_zhdr; 12901f862989SVitaly Wool struct z3fold_pool *pool; 12911f862989SVitaly Wool 12921f862989SVitaly Wool VM_BUG_ON_PAGE(!PageIsolated(page), page); 1293dcf5aedbSVitaly Wool VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 1294810481a2SHenry Burns VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 12951f862989SVitaly Wool 12961f862989SVitaly Wool zhdr = page_address(page); 12971f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 12981f862989SVitaly Wool 1299dcf5aedbSVitaly Wool if (!z3fold_page_trylock(zhdr)) 13001f862989SVitaly Wool return -EAGAIN; 13014a3ac931SVitaly Wool if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 1302dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 13034a1c3839SMiaohe Lin z3fold_page_unlock(zhdr); 13041f862989SVitaly Wool return -EBUSY; 13051f862989SVitaly Wool } 1306c92d2f38SHenry Burns if (work_pending(&zhdr->work)) { 1307c92d2f38SHenry Burns z3fold_page_unlock(zhdr); 1308c92d2f38SHenry Burns return -EAGAIN; 1309c92d2f38SHenry Burns } 13101f862989SVitaly Wool new_zhdr = page_address(newpage); 13111f862989SVitaly Wool memcpy(new_zhdr, zhdr, PAGE_SIZE); 13121f862989SVitaly Wool newpage->private = page->private; 1313943fb61dSMiaohe Lin set_bit(PAGE_MIGRATED, &page->private); 13141f862989SVitaly Wool z3fold_page_unlock(zhdr); 13151f862989SVitaly Wool spin_lock_init(&new_zhdr->page_lock); 1316c92d2f38SHenry Burns INIT_WORK(&new_zhdr->work, compact_page_work); 1317c92d2f38SHenry Burns /* 1318c92d2f38SHenry Burns * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1319c92d2f38SHenry Burns * so we only have to reinitialize it. 1320c92d2f38SHenry Burns */ 1321c92d2f38SHenry Burns INIT_LIST_HEAD(&new_zhdr->buddy); 13221f862989SVitaly Wool __ClearPageMovable(page); 13231f862989SVitaly Wool 13241f862989SVitaly Wool get_page(newpage); 13251f862989SVitaly Wool z3fold_page_lock(new_zhdr); 13261f862989SVitaly Wool if (new_zhdr->first_chunks) 13271f862989SVitaly Wool encode_handle(new_zhdr, FIRST); 13281f862989SVitaly Wool if (new_zhdr->last_chunks) 13291f862989SVitaly Wool encode_handle(new_zhdr, LAST); 13301f862989SVitaly Wool if (new_zhdr->middle_chunks) 13311f862989SVitaly Wool encode_handle(new_zhdr, MIDDLE); 13321f862989SVitaly Wool set_bit(NEEDS_COMPACTING, &newpage->private); 13331f862989SVitaly Wool new_zhdr->cpu = smp_processor_id(); 133468f2736aSMatthew Wilcox (Oracle) __SetPageMovable(newpage, &z3fold_mops); 13351f862989SVitaly Wool z3fold_page_unlock(new_zhdr); 13361f862989SVitaly Wool 13371f862989SVitaly Wool queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 13381f862989SVitaly Wool 1339943fb61dSMiaohe Lin /* PAGE_CLAIMED and PAGE_MIGRATED are cleared now. */ 1340943fb61dSMiaohe Lin page->private = 0; 13411f862989SVitaly Wool put_page(page); 13421f862989SVitaly Wool return 0; 13431f862989SVitaly Wool } 13441f862989SVitaly Wool 13451f862989SVitaly Wool static void z3fold_page_putback(struct page *page) 13461f862989SVitaly Wool { 13471f862989SVitaly Wool struct z3fold_header *zhdr; 13481f862989SVitaly Wool struct z3fold_pool *pool; 13491f862989SVitaly Wool 13501f862989SVitaly Wool zhdr = page_address(page); 13511f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 13521f862989SVitaly Wool 13531f862989SVitaly Wool z3fold_page_lock(zhdr); 13541f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 13551f862989SVitaly Wool list_del_init(&zhdr->buddy); 13561f862989SVitaly Wool INIT_LIST_HEAD(&page->lru); 135773d47193SRuan Jinjie if (put_z3fold_locked(zhdr)) 13581f862989SVitaly Wool return; 13596cf9a349SMiaohe Lin if (list_empty(&zhdr->buddy)) 13606cf9a349SMiaohe Lin add_to_unbuddied(pool, zhdr); 1361dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 13621f862989SVitaly Wool z3fold_page_unlock(zhdr); 13631f862989SVitaly Wool } 13641f862989SVitaly Wool 136568f2736aSMatthew Wilcox (Oracle) static const struct movable_operations z3fold_mops = { 13661f862989SVitaly Wool .isolate_page = z3fold_page_isolate, 136768f2736aSMatthew Wilcox (Oracle) .migrate_page = z3fold_page_migrate, 13681f862989SVitaly Wool .putback_page = z3fold_page_putback, 13691f862989SVitaly Wool }; 13701f862989SVitaly Wool 13719a001fc1SVitaly Wool /***************** 13729a001fc1SVitaly Wool * zpool 13739a001fc1SVitaly Wool ****************/ 13749a001fc1SVitaly Wool 137535499e2bSDomenico Cerasuolo static void *z3fold_zpool_create(const char *name, gfp_t gfp) 13769a001fc1SVitaly Wool { 1377e774a7bcSDomenico Cerasuolo return z3fold_create_pool(name, gfp); 13789a001fc1SVitaly Wool } 13799a001fc1SVitaly Wool 13809a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool) 13819a001fc1SVitaly Wool { 13829a001fc1SVitaly Wool z3fold_destroy_pool(pool); 13839a001fc1SVitaly Wool } 13849a001fc1SVitaly Wool 13859a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 13869a001fc1SVitaly Wool unsigned long *handle) 13879a001fc1SVitaly Wool { 13889a001fc1SVitaly Wool return z3fold_alloc(pool, size, gfp, handle); 13899a001fc1SVitaly Wool } 13909a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle) 13919a001fc1SVitaly Wool { 13929a001fc1SVitaly Wool z3fold_free(pool, handle); 13939a001fc1SVitaly Wool } 13949a001fc1SVitaly Wool 13959a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle, 13969a001fc1SVitaly Wool enum zpool_mapmode mm) 13979a001fc1SVitaly Wool { 13989a001fc1SVitaly Wool return z3fold_map(pool, handle); 13999a001fc1SVitaly Wool } 14009a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle) 14019a001fc1SVitaly Wool { 14029a001fc1SVitaly Wool z3fold_unmap(pool, handle); 14039a001fc1SVitaly Wool } 14049a001fc1SVitaly Wool 14059a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool) 14069a001fc1SVitaly Wool { 14079a001fc1SVitaly Wool return z3fold_get_pool_size(pool) * PAGE_SIZE; 14089a001fc1SVitaly Wool } 14099a001fc1SVitaly Wool 14109a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = { 14119a001fc1SVitaly Wool .type = "z3fold", 1412e818e820STian Tao .sleep_mapped = true, 14139a001fc1SVitaly Wool .owner = THIS_MODULE, 14149a001fc1SVitaly Wool .create = z3fold_zpool_create, 14159a001fc1SVitaly Wool .destroy = z3fold_zpool_destroy, 14169a001fc1SVitaly Wool .malloc = z3fold_zpool_malloc, 14179a001fc1SVitaly Wool .free = z3fold_zpool_free, 14189a001fc1SVitaly Wool .map = z3fold_zpool_map, 14199a001fc1SVitaly Wool .unmap = z3fold_zpool_unmap, 14209a001fc1SVitaly Wool .total_size = z3fold_zpool_total_size, 14219a001fc1SVitaly Wool }; 14229a001fc1SVitaly Wool 14239a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold"); 14249a001fc1SVitaly Wool 14259a001fc1SVitaly Wool static int __init init_z3fold(void) 14269a001fc1SVitaly Wool { 1427014284a0SMiaohe Lin /* 1428014284a0SMiaohe Lin * Make sure the z3fold header is not larger than the page size and 1429014284a0SMiaohe Lin * there has remaining spaces for its buddy. 1430014284a0SMiaohe Lin */ 1431014284a0SMiaohe Lin BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); 14329a001fc1SVitaly Wool zpool_register_driver(&z3fold_zpool_driver); 14339a001fc1SVitaly Wool 14349a001fc1SVitaly Wool return 0; 14359a001fc1SVitaly Wool } 14369a001fc1SVitaly Wool 14379a001fc1SVitaly Wool static void __exit exit_z3fold(void) 14389a001fc1SVitaly Wool { 14399a001fc1SVitaly Wool zpool_unregister_driver(&z3fold_zpool_driver); 14409a001fc1SVitaly Wool } 14419a001fc1SVitaly Wool 14429a001fc1SVitaly Wool module_init(init_z3fold); 14439a001fc1SVitaly Wool module_exit(exit_z3fold); 14449a001fc1SVitaly Wool 14459a001fc1SVitaly Wool MODULE_LICENSE("GPL"); 14469a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 14479a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1448