109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 29a001fc1SVitaly Wool /* 39a001fc1SVitaly Wool * z3fold.c 49a001fc1SVitaly Wool * 59a001fc1SVitaly Wool * Author: Vitaly Wool <vitaly.wool@konsulko.com> 69a001fc1SVitaly Wool * Copyright (C) 2016, Sony Mobile Communications Inc. 79a001fc1SVitaly Wool * 89a001fc1SVitaly Wool * This implementation is based on zbud written by Seth Jennings. 99a001fc1SVitaly Wool * 109a001fc1SVitaly Wool * z3fold is an special purpose allocator for storing compressed pages. It 119a001fc1SVitaly Wool * can store up to three compressed pages per page which improves the 129a001fc1SVitaly Wool * compression ratio of zbud while retaining its main concepts (e. g. always 139a001fc1SVitaly Wool * storing an integral number of objects per page) and simplicity. 149a001fc1SVitaly Wool * It still has simple and deterministic reclaim properties that make it 159a001fc1SVitaly Wool * preferable to a higher density approach (with no requirement on integral 169a001fc1SVitaly Wool * number of object per page) when reclaim is used. 179a001fc1SVitaly Wool * 189a001fc1SVitaly Wool * As in zbud, pages are divided into "chunks". The size of the chunks is 199a001fc1SVitaly Wool * fixed at compile time and is determined by NCHUNKS_ORDER below. 209a001fc1SVitaly Wool * 219a001fc1SVitaly Wool * z3fold doesn't export any API and is meant to be used via zpool API. 229a001fc1SVitaly Wool */ 239a001fc1SVitaly Wool 249a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 259a001fc1SVitaly Wool 269a001fc1SVitaly Wool #include <linux/atomic.h> 27d30561c5SVitaly Wool #include <linux/sched.h> 281f862989SVitaly Wool #include <linux/cpumask.h> 299a001fc1SVitaly Wool #include <linux/list.h> 309a001fc1SVitaly Wool #include <linux/mm.h> 319a001fc1SVitaly Wool #include <linux/module.h> 321f862989SVitaly Wool #include <linux/page-flags.h> 331f862989SVitaly Wool #include <linux/migrate.h> 341f862989SVitaly Wool #include <linux/node.h> 351f862989SVitaly Wool #include <linux/compaction.h> 36d30561c5SVitaly Wool #include <linux/percpu.h> 371f862989SVitaly Wool #include <linux/mount.h> 38ea8157abSDavid Howells #include <linux/pseudo_fs.h> 391f862989SVitaly Wool #include <linux/fs.h> 409a001fc1SVitaly Wool #include <linux/preempt.h> 41d30561c5SVitaly Wool #include <linux/workqueue.h> 429a001fc1SVitaly Wool #include <linux/slab.h> 439a001fc1SVitaly Wool #include <linux/spinlock.h> 449a001fc1SVitaly Wool #include <linux/zpool.h> 45ea8157abSDavid Howells #include <linux/magic.h> 46af4798a5SQian Cai #include <linux/kmemleak.h> 479a001fc1SVitaly Wool 489a001fc1SVitaly Wool /* 499a001fc1SVitaly Wool * NCHUNKS_ORDER determines the internal allocation granularity, effectively 509a001fc1SVitaly Wool * adjusting internal fragmentation. It also determines the number of 519a001fc1SVitaly Wool * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 52ede93213SVitaly Wool * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 53ede93213SVitaly Wool * in the beginning of an allocated page are occupied by z3fold header, so 54ede93213SVitaly Wool * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 55ede93213SVitaly Wool * which shows the max number of free chunks in z3fold page, also there will 56ede93213SVitaly Wool * be 63, or 62, respectively, freelists per pool. 579a001fc1SVitaly Wool */ 589a001fc1SVitaly Wool #define NCHUNKS_ORDER 6 599a001fc1SVitaly Wool 609a001fc1SVitaly Wool #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 619a001fc1SVitaly Wool #define CHUNK_SIZE (1 << CHUNK_SHIFT) 62ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 63ede93213SVitaly Wool #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 64ede93213SVitaly Wool #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 65e3c0db4fSMiaohe Lin #define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) 669a001fc1SVitaly Wool 67f201ebd8Szhong jiang #define BUDDY_MASK (0x3) 68ca0246bbSVitaly Wool #define BUDDY_SHIFT 2 697c2b8baaSVitaly Wool #define SLOTS_ALIGN (0x40) 707c2b8baaSVitaly Wool 717c2b8baaSVitaly Wool /***************** 727c2b8baaSVitaly Wool * Structures 737c2b8baaSVitaly Wool *****************/ 747c2b8baaSVitaly Wool struct z3fold_pool; 757c2b8baaSVitaly Wool struct z3fold_ops { 767c2b8baaSVitaly Wool int (*evict)(struct z3fold_pool *pool, unsigned long handle); 777c2b8baaSVitaly Wool }; 787c2b8baaSVitaly Wool 797c2b8baaSVitaly Wool enum buddy { 807c2b8baaSVitaly Wool HEADLESS = 0, 817c2b8baaSVitaly Wool FIRST, 827c2b8baaSVitaly Wool MIDDLE, 837c2b8baaSVitaly Wool LAST, 847c2b8baaSVitaly Wool BUDDIES_MAX = LAST 857c2b8baaSVitaly Wool }; 867c2b8baaSVitaly Wool 877c2b8baaSVitaly Wool struct z3fold_buddy_slots { 887c2b8baaSVitaly Wool /* 897c2b8baaSVitaly Wool * we are using BUDDY_MASK in handle_to_buddy etc. so there should 907c2b8baaSVitaly Wool * be enough slots to hold all possible variants 917c2b8baaSVitaly Wool */ 927c2b8baaSVitaly Wool unsigned long slot[BUDDY_MASK + 1]; 93fc548865SVitaly Wool unsigned long pool; /* back link */ 944a3ac931SVitaly Wool rwlock_t lock; 957c2b8baaSVitaly Wool }; 967c2b8baaSVitaly Wool #define HANDLE_FLAG_MASK (0x03) 977c2b8baaSVitaly Wool 987c2b8baaSVitaly Wool /* 997c2b8baaSVitaly Wool * struct z3fold_header - z3fold page metadata occupying first chunks of each 1007c2b8baaSVitaly Wool * z3fold page, except for HEADLESS pages 1017c2b8baaSVitaly Wool * @buddy: links the z3fold page into the relevant list in the 1027c2b8baaSVitaly Wool * pool 1037c2b8baaSVitaly Wool * @page_lock: per-page lock 1047c2b8baaSVitaly Wool * @refcount: reference count for the z3fold page 1057c2b8baaSVitaly Wool * @work: work_struct for page layout optimization 1067c2b8baaSVitaly Wool * @slots: pointer to the structure holding buddy slots 107bb9a374dSVitaly Wool * @pool: pointer to the containing pool 1087c2b8baaSVitaly Wool * @cpu: CPU which this page "belongs" to 1097c2b8baaSVitaly Wool * @first_chunks: the size of the first buddy in chunks, 0 if free 1107c2b8baaSVitaly Wool * @middle_chunks: the size of the middle buddy in chunks, 0 if free 1117c2b8baaSVitaly Wool * @last_chunks: the size of the last buddy in chunks, 0 if free 1127c2b8baaSVitaly Wool * @first_num: the starting number (for the first handle) 1131f862989SVitaly Wool * @mapped_count: the number of objects currently mapped 1147c2b8baaSVitaly Wool */ 1157c2b8baaSVitaly Wool struct z3fold_header { 1167c2b8baaSVitaly Wool struct list_head buddy; 1177c2b8baaSVitaly Wool spinlock_t page_lock; 1187c2b8baaSVitaly Wool struct kref refcount; 1197c2b8baaSVitaly Wool struct work_struct work; 1207c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 121bb9a374dSVitaly Wool struct z3fold_pool *pool; 1227c2b8baaSVitaly Wool short cpu; 1237c2b8baaSVitaly Wool unsigned short first_chunks; 1247c2b8baaSVitaly Wool unsigned short middle_chunks; 1257c2b8baaSVitaly Wool unsigned short last_chunks; 1267c2b8baaSVitaly Wool unsigned short start_middle; 1277c2b8baaSVitaly Wool unsigned short first_num:2; 1281f862989SVitaly Wool unsigned short mapped_count:2; 1294a3ac931SVitaly Wool unsigned short foreign_handles:2; 1307c2b8baaSVitaly Wool }; 1319a001fc1SVitaly Wool 1329a001fc1SVitaly Wool /** 1339a001fc1SVitaly Wool * struct z3fold_pool - stores metadata for each z3fold pool 134d30561c5SVitaly Wool * @name: pool name 135d30561c5SVitaly Wool * @lock: protects pool unbuddied/lru lists 136d30561c5SVitaly Wool * @stale_lock: protects pool stale page list 137d30561c5SVitaly Wool * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 138d30561c5SVitaly Wool * buddies; the list each z3fold page is added to depends on 139d30561c5SVitaly Wool * the size of its free region. 1409a001fc1SVitaly Wool * @lru: list tracking the z3fold pages in LRU order by most recently 1419a001fc1SVitaly Wool * added buddy. 142d30561c5SVitaly Wool * @stale: list of pages marked for freeing 1439a001fc1SVitaly Wool * @pages_nr: number of z3fold pages in the pool. 1447c2b8baaSVitaly Wool * @c_handle: cache for z3fold_buddy_slots allocation 1459a001fc1SVitaly Wool * @ops: pointer to a structure of user defined operations specified at 1469a001fc1SVitaly Wool * pool creation time. 147d30561c5SVitaly Wool * @compact_wq: workqueue for page layout background optimization 148d30561c5SVitaly Wool * @release_wq: workqueue for safe page release 149d30561c5SVitaly Wool * @work: work_struct for safe page release 1501f862989SVitaly Wool * @inode: inode for z3fold pseudo filesystem 1519a001fc1SVitaly Wool * 1529a001fc1SVitaly Wool * This structure is allocated at pool creation time and maintains metadata 1539a001fc1SVitaly Wool * pertaining to a particular z3fold pool. 1549a001fc1SVitaly Wool */ 1559a001fc1SVitaly Wool struct z3fold_pool { 156d30561c5SVitaly Wool const char *name; 1579a001fc1SVitaly Wool spinlock_t lock; 158d30561c5SVitaly Wool spinlock_t stale_lock; 159d30561c5SVitaly Wool struct list_head *unbuddied; 1609a001fc1SVitaly Wool struct list_head lru; 161d30561c5SVitaly Wool struct list_head stale; 16212d59ae6SVitaly Wool atomic64_t pages_nr; 1637c2b8baaSVitaly Wool struct kmem_cache *c_handle; 1649a001fc1SVitaly Wool const struct z3fold_ops *ops; 1659a001fc1SVitaly Wool struct zpool *zpool; 1669a001fc1SVitaly Wool const struct zpool_ops *zpool_ops; 167d30561c5SVitaly Wool struct workqueue_struct *compact_wq; 168d30561c5SVitaly Wool struct workqueue_struct *release_wq; 169d30561c5SVitaly Wool struct work_struct work; 1701f862989SVitaly Wool struct inode *inode; 1719a001fc1SVitaly Wool }; 1729a001fc1SVitaly Wool 1739a001fc1SVitaly Wool /* 1749a001fc1SVitaly Wool * Internal z3fold page flags 1759a001fc1SVitaly Wool */ 1769a001fc1SVitaly Wool enum z3fold_page_flags { 1775a27aa82SVitaly Wool PAGE_HEADLESS = 0, 1789a001fc1SVitaly Wool MIDDLE_CHUNK_MAPPED, 179d30561c5SVitaly Wool NEEDS_COMPACTING, 1806098d7e1SVitaly Wool PAGE_STALE, 181ca0246bbSVitaly Wool PAGE_CLAIMED, /* by either reclaim or free */ 1829a001fc1SVitaly Wool }; 1839a001fc1SVitaly Wool 1844a3ac931SVitaly Wool /* 185dcf5aedbSVitaly Wool * handle flags, go under HANDLE_FLAG_MASK 186dcf5aedbSVitaly Wool */ 187dcf5aedbSVitaly Wool enum z3fold_handle_flags { 188dcf5aedbSVitaly Wool HANDLES_NOFREE = 0, 189dcf5aedbSVitaly Wool }; 190dcf5aedbSVitaly Wool 191dcf5aedbSVitaly Wool /* 1924a3ac931SVitaly Wool * Forward declarations 1934a3ac931SVitaly Wool */ 1944a3ac931SVitaly Wool static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 1954a3ac931SVitaly Wool static void compact_page_work(struct work_struct *w); 1964a3ac931SVitaly Wool 1979a001fc1SVitaly Wool /***************** 1989a001fc1SVitaly Wool * Helpers 1999a001fc1SVitaly Wool *****************/ 2009a001fc1SVitaly Wool 2019a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */ 2029a001fc1SVitaly Wool static int size_to_chunks(size_t size) 2039a001fc1SVitaly Wool { 2049a001fc1SVitaly Wool return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 2059a001fc1SVitaly Wool } 2069a001fc1SVitaly Wool 2079a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \ 2089a001fc1SVitaly Wool for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 2099a001fc1SVitaly Wool 210bb9f6f63SVitaly Wool static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 211bb9f6f63SVitaly Wool gfp_t gfp) 2127c2b8baaSVitaly Wool { 213f1549cb5SHenry Burns struct z3fold_buddy_slots *slots; 214f1549cb5SHenry Burns 215f94afee9SHui Su slots = kmem_cache_zalloc(pool->c_handle, 216f1549cb5SHenry Burns (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); 2177c2b8baaSVitaly Wool 2187c2b8baaSVitaly Wool if (slots) { 219af4798a5SQian Cai /* It will be freed separately in free_handle(). */ 220af4798a5SQian Cai kmemleak_not_leak(slots); 2217c2b8baaSVitaly Wool slots->pool = (unsigned long)pool; 2224a3ac931SVitaly Wool rwlock_init(&slots->lock); 2237c2b8baaSVitaly Wool } 2247c2b8baaSVitaly Wool 2257c2b8baaSVitaly Wool return slots; 2267c2b8baaSVitaly Wool } 2277c2b8baaSVitaly Wool 2287c2b8baaSVitaly Wool static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 2297c2b8baaSVitaly Wool { 2307c2b8baaSVitaly Wool return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 2317c2b8baaSVitaly Wool } 2327c2b8baaSVitaly Wool 2337c2b8baaSVitaly Wool static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 2347c2b8baaSVitaly Wool { 2357c2b8baaSVitaly Wool return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 2367c2b8baaSVitaly Wool } 2377c2b8baaSVitaly Wool 2384a3ac931SVitaly Wool /* Lock a z3fold page */ 2394a3ac931SVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr) 2404a3ac931SVitaly Wool { 2414a3ac931SVitaly Wool spin_lock(&zhdr->page_lock); 2424a3ac931SVitaly Wool } 2434a3ac931SVitaly Wool 2444a3ac931SVitaly Wool /* Try to lock a z3fold page */ 2454a3ac931SVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 2464a3ac931SVitaly Wool { 2474a3ac931SVitaly Wool return spin_trylock(&zhdr->page_lock); 2484a3ac931SVitaly Wool } 2494a3ac931SVitaly Wool 2504a3ac931SVitaly Wool /* Unlock a z3fold page */ 2514a3ac931SVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 2524a3ac931SVitaly Wool { 2534a3ac931SVitaly Wool spin_unlock(&zhdr->page_lock); 2544a3ac931SVitaly Wool } 2554a3ac931SVitaly Wool 256*767cc6c5SMiaohe Lin /* return locked z3fold page if it's not headless */ 257*767cc6c5SMiaohe Lin static inline struct z3fold_header *get_z3fold_header(unsigned long handle) 2584a3ac931SVitaly Wool { 2594a3ac931SVitaly Wool struct z3fold_buddy_slots *slots; 2604a3ac931SVitaly Wool struct z3fold_header *zhdr; 2614a3ac931SVitaly Wool int locked = 0; 2624a3ac931SVitaly Wool 2634a3ac931SVitaly Wool if (!(handle & (1 << PAGE_HEADLESS))) { 2644a3ac931SVitaly Wool slots = handle_to_slots(handle); 2654a3ac931SVitaly Wool do { 2664a3ac931SVitaly Wool unsigned long addr; 2674a3ac931SVitaly Wool 2684a3ac931SVitaly Wool read_lock(&slots->lock); 2694a3ac931SVitaly Wool addr = *(unsigned long *)handle; 2704a3ac931SVitaly Wool zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 2714a3ac931SVitaly Wool locked = z3fold_page_trylock(zhdr); 2724a3ac931SVitaly Wool read_unlock(&slots->lock); 2734a3ac931SVitaly Wool if (locked) 2744a3ac931SVitaly Wool break; 2754a3ac931SVitaly Wool cpu_relax(); 276*767cc6c5SMiaohe Lin } while (true); 2774a3ac931SVitaly Wool } else { 2784a3ac931SVitaly Wool zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 2794a3ac931SVitaly Wool } 2804a3ac931SVitaly Wool 2814a3ac931SVitaly Wool return zhdr; 2824a3ac931SVitaly Wool } 2834a3ac931SVitaly Wool 2844a3ac931SVitaly Wool static inline void put_z3fold_header(struct z3fold_header *zhdr) 2854a3ac931SVitaly Wool { 2864a3ac931SVitaly Wool struct page *page = virt_to_page(zhdr); 2874a3ac931SVitaly Wool 2884a3ac931SVitaly Wool if (!test_bit(PAGE_HEADLESS, &page->private)) 2894a3ac931SVitaly Wool z3fold_page_unlock(zhdr); 2904a3ac931SVitaly Wool } 2914a3ac931SVitaly Wool 292fc548865SVitaly Wool static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 2937c2b8baaSVitaly Wool { 2947c2b8baaSVitaly Wool struct z3fold_buddy_slots *slots; 2957c2b8baaSVitaly Wool int i; 2967c2b8baaSVitaly Wool bool is_free; 2977c2b8baaSVitaly Wool 2987c2b8baaSVitaly Wool if (handle & (1 << PAGE_HEADLESS)) 2997c2b8baaSVitaly Wool return; 3007c2b8baaSVitaly Wool 3014a3ac931SVitaly Wool if (WARN_ON(*(unsigned long *)handle == 0)) 3024a3ac931SVitaly Wool return; 3034a3ac931SVitaly Wool 3047c2b8baaSVitaly Wool slots = handle_to_slots(handle); 3054a3ac931SVitaly Wool write_lock(&slots->lock); 3064a3ac931SVitaly Wool *(unsigned long *)handle = 0; 307dcf5aedbSVitaly Wool 308dcf5aedbSVitaly Wool if (test_bit(HANDLES_NOFREE, &slots->pool)) { 309dcf5aedbSVitaly Wool write_unlock(&slots->lock); 310dcf5aedbSVitaly Wool return; /* simple case, nothing else to do */ 311dcf5aedbSVitaly Wool } 312dcf5aedbSVitaly Wool 313fc548865SVitaly Wool if (zhdr->slots != slots) 3144a3ac931SVitaly Wool zhdr->foreign_handles--; 315fc548865SVitaly Wool 3167c2b8baaSVitaly Wool is_free = true; 3177c2b8baaSVitaly Wool for (i = 0; i <= BUDDY_MASK; i++) { 3187c2b8baaSVitaly Wool if (slots->slot[i]) { 3197c2b8baaSVitaly Wool is_free = false; 3207c2b8baaSVitaly Wool break; 3217c2b8baaSVitaly Wool } 3227c2b8baaSVitaly Wool } 323d8f117abSUladzislau Rezki write_unlock(&slots->lock); 3247c2b8baaSVitaly Wool 3257c2b8baaSVitaly Wool if (is_free) { 3267c2b8baaSVitaly Wool struct z3fold_pool *pool = slots_to_pool(slots); 3277c2b8baaSVitaly Wool 328fc548865SVitaly Wool if (zhdr->slots == slots) 329fc548865SVitaly Wool zhdr->slots = NULL; 3307c2b8baaSVitaly Wool kmem_cache_free(pool->c_handle, slots); 3317c2b8baaSVitaly Wool } 3327c2b8baaSVitaly Wool } 3337c2b8baaSVitaly Wool 334ea8157abSDavid Howells static int z3fold_init_fs_context(struct fs_context *fc) 3351f862989SVitaly Wool { 336ea8157abSDavid Howells return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; 3371f862989SVitaly Wool } 3381f862989SVitaly Wool 3391f862989SVitaly Wool static struct file_system_type z3fold_fs = { 3401f862989SVitaly Wool .name = "z3fold", 341ea8157abSDavid Howells .init_fs_context = z3fold_init_fs_context, 3421f862989SVitaly Wool .kill_sb = kill_anon_super, 3431f862989SVitaly Wool }; 3441f862989SVitaly Wool 3451f862989SVitaly Wool static struct vfsmount *z3fold_mnt; 3461f862989SVitaly Wool static int z3fold_mount(void) 3471f862989SVitaly Wool { 3481f862989SVitaly Wool int ret = 0; 3491f862989SVitaly Wool 3501f862989SVitaly Wool z3fold_mnt = kern_mount(&z3fold_fs); 3511f862989SVitaly Wool if (IS_ERR(z3fold_mnt)) 3521f862989SVitaly Wool ret = PTR_ERR(z3fold_mnt); 3531f862989SVitaly Wool 3541f862989SVitaly Wool return ret; 3551f862989SVitaly Wool } 3561f862989SVitaly Wool 3571f862989SVitaly Wool static void z3fold_unmount(void) 3581f862989SVitaly Wool { 3591f862989SVitaly Wool kern_unmount(z3fold_mnt); 3601f862989SVitaly Wool } 3611f862989SVitaly Wool 3621f862989SVitaly Wool static const struct address_space_operations z3fold_aops; 3631f862989SVitaly Wool static int z3fold_register_migration(struct z3fold_pool *pool) 3641f862989SVitaly Wool { 3651f862989SVitaly Wool pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); 3661f862989SVitaly Wool if (IS_ERR(pool->inode)) { 3671f862989SVitaly Wool pool->inode = NULL; 3681f862989SVitaly Wool return 1; 3691f862989SVitaly Wool } 3701f862989SVitaly Wool 3711f862989SVitaly Wool pool->inode->i_mapping->private_data = pool; 3721f862989SVitaly Wool pool->inode->i_mapping->a_ops = &z3fold_aops; 3731f862989SVitaly Wool return 0; 3741f862989SVitaly Wool } 3751f862989SVitaly Wool 3761f862989SVitaly Wool static void z3fold_unregister_migration(struct z3fold_pool *pool) 3771f862989SVitaly Wool { 3781f862989SVitaly Wool if (pool->inode) 3791f862989SVitaly Wool iput(pool->inode); 3801f862989SVitaly Wool } 3811f862989SVitaly Wool 3829a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */ 38363398413SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 384bb9f6f63SVitaly Wool struct z3fold_pool *pool, gfp_t gfp) 3859a001fc1SVitaly Wool { 3869a001fc1SVitaly Wool struct z3fold_header *zhdr = page_address(page); 38763398413SVitaly Wool struct z3fold_buddy_slots *slots; 3889a001fc1SVitaly Wool 3899a001fc1SVitaly Wool INIT_LIST_HEAD(&page->lru); 3909a001fc1SVitaly Wool clear_bit(PAGE_HEADLESS, &page->private); 3919a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 392d30561c5SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 393d30561c5SVitaly Wool clear_bit(PAGE_STALE, &page->private); 394ca0246bbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 39563398413SVitaly Wool if (headless) 39663398413SVitaly Wool return zhdr; 39763398413SVitaly Wool 39863398413SVitaly Wool slots = alloc_slots(pool, gfp); 39963398413SVitaly Wool if (!slots) 40063398413SVitaly Wool return NULL; 4019a001fc1SVitaly Wool 402c457cd96SMiaohe Lin memset(zhdr, 0, sizeof(*zhdr)); 4032f1e5e4dSVitaly Wool spin_lock_init(&zhdr->page_lock); 4045a27aa82SVitaly Wool kref_init(&zhdr->refcount); 405d30561c5SVitaly Wool zhdr->cpu = -1; 4067c2b8baaSVitaly Wool zhdr->slots = slots; 407bb9a374dSVitaly Wool zhdr->pool = pool; 4089a001fc1SVitaly Wool INIT_LIST_HEAD(&zhdr->buddy); 409d30561c5SVitaly Wool INIT_WORK(&zhdr->work, compact_page_work); 4109a001fc1SVitaly Wool return zhdr; 4119a001fc1SVitaly Wool } 4129a001fc1SVitaly Wool 4139a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */ 4141f862989SVitaly Wool static void free_z3fold_page(struct page *page, bool headless) 4159a001fc1SVitaly Wool { 4161f862989SVitaly Wool if (!headless) { 4171f862989SVitaly Wool lock_page(page); 4181f862989SVitaly Wool __ClearPageMovable(page); 4191f862989SVitaly Wool unlock_page(page); 4201f862989SVitaly Wool } 4211f862989SVitaly Wool ClearPagePrivate(page); 4225a27aa82SVitaly Wool __free_page(page); 4235a27aa82SVitaly Wool } 4245a27aa82SVitaly Wool 4257c2b8baaSVitaly Wool /* Helper function to build the index */ 4267c2b8baaSVitaly Wool static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 4277c2b8baaSVitaly Wool { 4287c2b8baaSVitaly Wool return (bud + zhdr->first_num) & BUDDY_MASK; 4297c2b8baaSVitaly Wool } 4307c2b8baaSVitaly Wool 4319a001fc1SVitaly Wool /* 4329a001fc1SVitaly Wool * Encodes the handle of a particular buddy within a z3fold page 4339a001fc1SVitaly Wool * Pool lock should be held as this function accesses first_num 4349a001fc1SVitaly Wool */ 4353f9d2b57SVitaly Wool static unsigned long __encode_handle(struct z3fold_header *zhdr, 4363f9d2b57SVitaly Wool struct z3fold_buddy_slots *slots, 4373f9d2b57SVitaly Wool enum buddy bud) 4389a001fc1SVitaly Wool { 4397c2b8baaSVitaly Wool unsigned long h = (unsigned long)zhdr; 4407c2b8baaSVitaly Wool int idx = 0; 4419a001fc1SVitaly Wool 4427c2b8baaSVitaly Wool /* 4437c2b8baaSVitaly Wool * For a headless page, its handle is its pointer with the extra 4447c2b8baaSVitaly Wool * PAGE_HEADLESS bit set 4457c2b8baaSVitaly Wool */ 4467c2b8baaSVitaly Wool if (bud == HEADLESS) 4477c2b8baaSVitaly Wool return h | (1 << PAGE_HEADLESS); 4487c2b8baaSVitaly Wool 4497c2b8baaSVitaly Wool /* otherwise, return pointer to encoded handle */ 4507c2b8baaSVitaly Wool idx = __idx(zhdr, bud); 4517c2b8baaSVitaly Wool h += idx; 452ca0246bbSVitaly Wool if (bud == LAST) 4537c2b8baaSVitaly Wool h |= (zhdr->last_chunks << BUDDY_SHIFT); 4547c2b8baaSVitaly Wool 4554a3ac931SVitaly Wool write_lock(&slots->lock); 4567c2b8baaSVitaly Wool slots->slot[idx] = h; 4574a3ac931SVitaly Wool write_unlock(&slots->lock); 4587c2b8baaSVitaly Wool return (unsigned long)&slots->slot[idx]; 4599a001fc1SVitaly Wool } 4609a001fc1SVitaly Wool 4613f9d2b57SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 4623f9d2b57SVitaly Wool { 4633f9d2b57SVitaly Wool return __encode_handle(zhdr, zhdr->slots, bud); 4643f9d2b57SVitaly Wool } 4653f9d2b57SVitaly Wool 466ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */ 467ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle) 468ca0246bbSVitaly Wool { 4694a3ac931SVitaly Wool struct z3fold_buddy_slots *slots = handle_to_slots(handle); 4704a3ac931SVitaly Wool unsigned long addr; 4717c2b8baaSVitaly Wool 4724a3ac931SVitaly Wool read_lock(&slots->lock); 4734a3ac931SVitaly Wool addr = *(unsigned long *)handle; 4744a3ac931SVitaly Wool read_unlock(&slots->lock); 4757c2b8baaSVitaly Wool return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 476ca0246bbSVitaly Wool } 477ca0246bbSVitaly Wool 478f201ebd8Szhong jiang /* 479f201ebd8Szhong jiang * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 480f201ebd8Szhong jiang * but that doesn't matter. because the masking will result in the 481f201ebd8Szhong jiang * correct buddy number. 482f201ebd8Szhong jiang */ 4839a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle) 4849a001fc1SVitaly Wool { 4857c2b8baaSVitaly Wool struct z3fold_header *zhdr; 4864a3ac931SVitaly Wool struct z3fold_buddy_slots *slots = handle_to_slots(handle); 4877c2b8baaSVitaly Wool unsigned long addr; 4887c2b8baaSVitaly Wool 4894a3ac931SVitaly Wool read_lock(&slots->lock); 4907c2b8baaSVitaly Wool WARN_ON(handle & (1 << PAGE_HEADLESS)); 4917c2b8baaSVitaly Wool addr = *(unsigned long *)handle; 4924a3ac931SVitaly Wool read_unlock(&slots->lock); 4937c2b8baaSVitaly Wool zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 4947c2b8baaSVitaly Wool return (addr - zhdr->first_num) & BUDDY_MASK; 4959a001fc1SVitaly Wool } 4969a001fc1SVitaly Wool 4979050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 4989050cce1SVitaly Wool { 499bb9a374dSVitaly Wool return zhdr->pool; 5009050cce1SVitaly Wool } 5019050cce1SVitaly Wool 502d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 503d30561c5SVitaly Wool { 504d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 5059050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 506d30561c5SVitaly Wool 507d30561c5SVitaly Wool WARN_ON(!list_empty(&zhdr->buddy)); 508d30561c5SVitaly Wool set_bit(PAGE_STALE, &page->private); 50935529357SVitaly Wool clear_bit(NEEDS_COMPACTING, &page->private); 510d30561c5SVitaly Wool spin_lock(&pool->lock); 511d30561c5SVitaly Wool if (!list_empty(&page->lru)) 5121f862989SVitaly Wool list_del_init(&page->lru); 513d30561c5SVitaly Wool spin_unlock(&pool->lock); 5144a3ac931SVitaly Wool 515d30561c5SVitaly Wool if (locked) 516d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 5174a3ac931SVitaly Wool 518d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 519d30561c5SVitaly Wool list_add(&zhdr->buddy, &pool->stale); 520d30561c5SVitaly Wool queue_work(pool->release_wq, &pool->work); 521d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 522d30561c5SVitaly Wool } 523d30561c5SVitaly Wool 52470ad3196SMiaohe Lin static void release_z3fold_page(struct kref *ref) 525d30561c5SVitaly Wool { 526d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 527d30561c5SVitaly Wool refcount); 528d30561c5SVitaly Wool __release_z3fold_page(zhdr, false); 529d30561c5SVitaly Wool } 530d30561c5SVitaly Wool 531d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref) 532d30561c5SVitaly Wool { 533d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 534d30561c5SVitaly Wool refcount); 535d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 536d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 537d30561c5SVitaly Wool } 538d30561c5SVitaly Wool 539d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref) 540d30561c5SVitaly Wool { 541d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 542d30561c5SVitaly Wool refcount); 5439050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 5444a3ac931SVitaly Wool 5459050cce1SVitaly Wool spin_lock(&pool->lock); 546d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 5479050cce1SVitaly Wool spin_unlock(&pool->lock); 548d30561c5SVitaly Wool 549d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 550d30561c5SVitaly Wool __release_z3fold_page(zhdr, true); 551d30561c5SVitaly Wool } 552d30561c5SVitaly Wool 553d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w) 554d30561c5SVitaly Wool { 555d30561c5SVitaly Wool struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 556d30561c5SVitaly Wool 557d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 558d30561c5SVitaly Wool while (!list_empty(&pool->stale)) { 559d30561c5SVitaly Wool struct z3fold_header *zhdr = list_first_entry(&pool->stale, 560d30561c5SVitaly Wool struct z3fold_header, buddy); 561d30561c5SVitaly Wool struct page *page = virt_to_page(zhdr); 562d30561c5SVitaly Wool 563d30561c5SVitaly Wool list_del(&zhdr->buddy); 564d30561c5SVitaly Wool if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 565d30561c5SVitaly Wool continue; 566d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 567d30561c5SVitaly Wool cancel_work_sync(&zhdr->work); 5681f862989SVitaly Wool free_z3fold_page(page, false); 569d30561c5SVitaly Wool cond_resched(); 570d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 571d30561c5SVitaly Wool } 572d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 573d30561c5SVitaly Wool } 574d30561c5SVitaly Wool 5759a001fc1SVitaly Wool /* 5769a001fc1SVitaly Wool * Returns the number of free chunks in a z3fold page. 5779a001fc1SVitaly Wool * NB: can't be used with HEADLESS pages. 5789a001fc1SVitaly Wool */ 5799a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr) 5809a001fc1SVitaly Wool { 5819a001fc1SVitaly Wool int nfree; 5829a001fc1SVitaly Wool /* 5839a001fc1SVitaly Wool * If there is a middle object, pick up the bigger free space 5849a001fc1SVitaly Wool * either before or after it. Otherwise just subtract the number 5859a001fc1SVitaly Wool * of chunks occupied by the first and the last objects. 5869a001fc1SVitaly Wool */ 5879a001fc1SVitaly Wool if (zhdr->middle_chunks != 0) { 5889a001fc1SVitaly Wool int nfree_before = zhdr->first_chunks ? 589ede93213SVitaly Wool 0 : zhdr->start_middle - ZHDR_CHUNKS; 5909a001fc1SVitaly Wool int nfree_after = zhdr->last_chunks ? 591ede93213SVitaly Wool 0 : TOTAL_CHUNKS - 592ede93213SVitaly Wool (zhdr->start_middle + zhdr->middle_chunks); 5939a001fc1SVitaly Wool nfree = max(nfree_before, nfree_after); 5949a001fc1SVitaly Wool } else 5959a001fc1SVitaly Wool nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 5969a001fc1SVitaly Wool return nfree; 5979a001fc1SVitaly Wool } 5989a001fc1SVitaly Wool 5999050cce1SVitaly Wool /* Add to the appropriate unbuddied list */ 6009050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool, 6019050cce1SVitaly Wool struct z3fold_header *zhdr) 6029050cce1SVitaly Wool { 6039050cce1SVitaly Wool if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 6049050cce1SVitaly Wool zhdr->middle_chunks == 0) { 605135f97fdSVitaly Wool struct list_head *unbuddied; 6069050cce1SVitaly Wool int freechunks = num_free_chunks(zhdr); 607135f97fdSVitaly Wool 608135f97fdSVitaly Wool migrate_disable(); 609135f97fdSVitaly Wool unbuddied = this_cpu_ptr(pool->unbuddied); 6109050cce1SVitaly Wool spin_lock(&pool->lock); 6119050cce1SVitaly Wool list_add(&zhdr->buddy, &unbuddied[freechunks]); 6129050cce1SVitaly Wool spin_unlock(&pool->lock); 6139050cce1SVitaly Wool zhdr->cpu = smp_processor_id(); 614135f97fdSVitaly Wool migrate_enable(); 6159050cce1SVitaly Wool } 6169050cce1SVitaly Wool } 6179050cce1SVitaly Wool 618dcf5aedbSVitaly Wool static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 619dcf5aedbSVitaly Wool { 620dcf5aedbSVitaly Wool enum buddy bud = HEADLESS; 621dcf5aedbSVitaly Wool 622dcf5aedbSVitaly Wool if (zhdr->middle_chunks) { 623dcf5aedbSVitaly Wool if (!zhdr->first_chunks && 624dcf5aedbSVitaly Wool chunks <= zhdr->start_middle - ZHDR_CHUNKS) 625dcf5aedbSVitaly Wool bud = FIRST; 626dcf5aedbSVitaly Wool else if (!zhdr->last_chunks) 627dcf5aedbSVitaly Wool bud = LAST; 628dcf5aedbSVitaly Wool } else { 629dcf5aedbSVitaly Wool if (!zhdr->first_chunks) 630dcf5aedbSVitaly Wool bud = FIRST; 631dcf5aedbSVitaly Wool else if (!zhdr->last_chunks) 632dcf5aedbSVitaly Wool bud = LAST; 633dcf5aedbSVitaly Wool else 634dcf5aedbSVitaly Wool bud = MIDDLE; 635dcf5aedbSVitaly Wool } 636dcf5aedbSVitaly Wool 637dcf5aedbSVitaly Wool return bud; 638dcf5aedbSVitaly Wool } 639dcf5aedbSVitaly Wool 640ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr, 641ede93213SVitaly Wool unsigned short dst_chunk) 642ede93213SVitaly Wool { 643ede93213SVitaly Wool void *beg = zhdr; 644ede93213SVitaly Wool return memmove(beg + (dst_chunk << CHUNK_SHIFT), 645ede93213SVitaly Wool beg + (zhdr->start_middle << CHUNK_SHIFT), 646ede93213SVitaly Wool zhdr->middle_chunks << CHUNK_SHIFT); 647ede93213SVitaly Wool } 648ede93213SVitaly Wool 6494a3ac931SVitaly Wool static inline bool buddy_single(struct z3fold_header *zhdr) 6504a3ac931SVitaly Wool { 6514a3ac931SVitaly Wool return !((zhdr->first_chunks && zhdr->middle_chunks) || 6524a3ac931SVitaly Wool (zhdr->first_chunks && zhdr->last_chunks) || 6534a3ac931SVitaly Wool (zhdr->middle_chunks && zhdr->last_chunks)); 6544a3ac931SVitaly Wool } 6554a3ac931SVitaly Wool 6564a3ac931SVitaly Wool static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 6574a3ac931SVitaly Wool { 6584a3ac931SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 6594a3ac931SVitaly Wool void *p = zhdr; 6604a3ac931SVitaly Wool unsigned long old_handle = 0; 6614a3ac931SVitaly Wool size_t sz = 0; 6624a3ac931SVitaly Wool struct z3fold_header *new_zhdr = NULL; 6634a3ac931SVitaly Wool int first_idx = __idx(zhdr, FIRST); 6644a3ac931SVitaly Wool int middle_idx = __idx(zhdr, MIDDLE); 6654a3ac931SVitaly Wool int last_idx = __idx(zhdr, LAST); 6664a3ac931SVitaly Wool unsigned short *moved_chunks = NULL; 6674a3ac931SVitaly Wool 6684a3ac931SVitaly Wool /* 6694a3ac931SVitaly Wool * No need to protect slots here -- all the slots are "local" and 6704a3ac931SVitaly Wool * the page lock is already taken 6714a3ac931SVitaly Wool */ 6724a3ac931SVitaly Wool if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 6734a3ac931SVitaly Wool p += ZHDR_SIZE_ALIGNED; 6744a3ac931SVitaly Wool sz = zhdr->first_chunks << CHUNK_SHIFT; 6754a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 6764a3ac931SVitaly Wool moved_chunks = &zhdr->first_chunks; 6774a3ac931SVitaly Wool } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 6784a3ac931SVitaly Wool p += zhdr->start_middle << CHUNK_SHIFT; 6794a3ac931SVitaly Wool sz = zhdr->middle_chunks << CHUNK_SHIFT; 6804a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 6814a3ac931SVitaly Wool moved_chunks = &zhdr->middle_chunks; 6824a3ac931SVitaly Wool } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 6834a3ac931SVitaly Wool p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 6844a3ac931SVitaly Wool sz = zhdr->last_chunks << CHUNK_SHIFT; 6854a3ac931SVitaly Wool old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 6864a3ac931SVitaly Wool moved_chunks = &zhdr->last_chunks; 6874a3ac931SVitaly Wool } 6884a3ac931SVitaly Wool 6894a3ac931SVitaly Wool if (sz > 0) { 6904a3ac931SVitaly Wool enum buddy new_bud = HEADLESS; 6914a3ac931SVitaly Wool short chunks = size_to_chunks(sz); 6924a3ac931SVitaly Wool void *q; 6934a3ac931SVitaly Wool 6944a3ac931SVitaly Wool new_zhdr = __z3fold_alloc(pool, sz, false); 6954a3ac931SVitaly Wool if (!new_zhdr) 6964a3ac931SVitaly Wool return NULL; 6974a3ac931SVitaly Wool 6984a3ac931SVitaly Wool if (WARN_ON(new_zhdr == zhdr)) 6994a3ac931SVitaly Wool goto out_fail; 7004a3ac931SVitaly Wool 701dcf5aedbSVitaly Wool new_bud = get_free_buddy(new_zhdr, chunks); 7024a3ac931SVitaly Wool q = new_zhdr; 7034a3ac931SVitaly Wool switch (new_bud) { 7044a3ac931SVitaly Wool case FIRST: 7054a3ac931SVitaly Wool new_zhdr->first_chunks = chunks; 7064a3ac931SVitaly Wool q += ZHDR_SIZE_ALIGNED; 7074a3ac931SVitaly Wool break; 7084a3ac931SVitaly Wool case MIDDLE: 7094a3ac931SVitaly Wool new_zhdr->middle_chunks = chunks; 7104a3ac931SVitaly Wool new_zhdr->start_middle = 7114a3ac931SVitaly Wool new_zhdr->first_chunks + ZHDR_CHUNKS; 7124a3ac931SVitaly Wool q += new_zhdr->start_middle << CHUNK_SHIFT; 7134a3ac931SVitaly Wool break; 7144a3ac931SVitaly Wool case LAST: 7154a3ac931SVitaly Wool new_zhdr->last_chunks = chunks; 7164a3ac931SVitaly Wool q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 7174a3ac931SVitaly Wool break; 7184a3ac931SVitaly Wool default: 7194a3ac931SVitaly Wool goto out_fail; 7204a3ac931SVitaly Wool } 7214a3ac931SVitaly Wool new_zhdr->foreign_handles++; 7224a3ac931SVitaly Wool memcpy(q, p, sz); 7234a3ac931SVitaly Wool write_lock(&zhdr->slots->lock); 7244a3ac931SVitaly Wool *(unsigned long *)old_handle = (unsigned long)new_zhdr + 7254a3ac931SVitaly Wool __idx(new_zhdr, new_bud); 7264a3ac931SVitaly Wool if (new_bud == LAST) 7274a3ac931SVitaly Wool *(unsigned long *)old_handle |= 7284a3ac931SVitaly Wool (new_zhdr->last_chunks << BUDDY_SHIFT); 7294a3ac931SVitaly Wool write_unlock(&zhdr->slots->lock); 7304a3ac931SVitaly Wool add_to_unbuddied(pool, new_zhdr); 7314a3ac931SVitaly Wool z3fold_page_unlock(new_zhdr); 7324a3ac931SVitaly Wool 7334a3ac931SVitaly Wool *moved_chunks = 0; 7344a3ac931SVitaly Wool } 7354a3ac931SVitaly Wool 7364a3ac931SVitaly Wool return new_zhdr; 7374a3ac931SVitaly Wool 7384a3ac931SVitaly Wool out_fail: 7394a3ac931SVitaly Wool if (new_zhdr) { 7404a3ac931SVitaly Wool if (kref_put(&new_zhdr->refcount, release_z3fold_page_locked)) 7414a3ac931SVitaly Wool atomic64_dec(&pool->pages_nr); 7424a3ac931SVitaly Wool else { 7434a3ac931SVitaly Wool add_to_unbuddied(pool, new_zhdr); 7444a3ac931SVitaly Wool z3fold_page_unlock(new_zhdr); 7454a3ac931SVitaly Wool } 7464a3ac931SVitaly Wool } 7474a3ac931SVitaly Wool return NULL; 7484a3ac931SVitaly Wool 7494a3ac931SVitaly Wool } 7504a3ac931SVitaly Wool 7511b096e5aSVitaly Wool #define BIG_CHUNK_GAP 3 7529a001fc1SVitaly Wool /* Has to be called with lock held */ 7539a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr) 7549a001fc1SVitaly Wool { 7559a001fc1SVitaly Wool struct page *page = virt_to_page(zhdr); 7569a001fc1SVitaly Wool 757ede93213SVitaly Wool if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 758ede93213SVitaly Wool return 0; /* can't move middle chunk, it's used */ 7599a001fc1SVitaly Wool 7601f862989SVitaly Wool if (unlikely(PageIsolated(page))) 7611f862989SVitaly Wool return 0; 7621f862989SVitaly Wool 763ede93213SVitaly Wool if (zhdr->middle_chunks == 0) 764ede93213SVitaly Wool return 0; /* nothing to compact */ 765ede93213SVitaly Wool 766ede93213SVitaly Wool if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 767ede93213SVitaly Wool /* move to the beginning */ 768ede93213SVitaly Wool mchunk_memmove(zhdr, ZHDR_CHUNKS); 7699a001fc1SVitaly Wool zhdr->first_chunks = zhdr->middle_chunks; 7709a001fc1SVitaly Wool zhdr->middle_chunks = 0; 7719a001fc1SVitaly Wool zhdr->start_middle = 0; 7729a001fc1SVitaly Wool zhdr->first_num++; 773ede93213SVitaly Wool return 1; 7749a001fc1SVitaly Wool } 7759a001fc1SVitaly Wool 7761b096e5aSVitaly Wool /* 7771b096e5aSVitaly Wool * moving data is expensive, so let's only do that if 7781b096e5aSVitaly Wool * there's substantial gain (at least BIG_CHUNK_GAP chunks) 7791b096e5aSVitaly Wool */ 7801b096e5aSVitaly Wool if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 7811b096e5aSVitaly Wool zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 7821b096e5aSVitaly Wool BIG_CHUNK_GAP) { 7831b096e5aSVitaly Wool mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 7841b096e5aSVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 7851b096e5aSVitaly Wool return 1; 7861b096e5aSVitaly Wool } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 7871b096e5aSVitaly Wool TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 7881b096e5aSVitaly Wool + zhdr->middle_chunks) >= 7891b096e5aSVitaly Wool BIG_CHUNK_GAP) { 7901b096e5aSVitaly Wool unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 7911b096e5aSVitaly Wool zhdr->middle_chunks; 7921b096e5aSVitaly Wool mchunk_memmove(zhdr, new_start); 7931b096e5aSVitaly Wool zhdr->start_middle = new_start; 7941b096e5aSVitaly Wool return 1; 7951b096e5aSVitaly Wool } 7961b096e5aSVitaly Wool 7971b096e5aSVitaly Wool return 0; 7981b096e5aSVitaly Wool } 7991b096e5aSVitaly Wool 800d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked) 801d30561c5SVitaly Wool { 8029050cce1SVitaly Wool struct z3fold_pool *pool = zhdr_to_pool(zhdr); 803d30561c5SVitaly Wool struct page *page; 804d30561c5SVitaly Wool 805d30561c5SVitaly Wool page = virt_to_page(zhdr); 806d30561c5SVitaly Wool if (locked) 807d30561c5SVitaly Wool WARN_ON(z3fold_page_trylock(zhdr)); 808d30561c5SVitaly Wool else 809d30561c5SVitaly Wool z3fold_page_lock(zhdr); 8105d03a661SVitaly Wool if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 811d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 812d30561c5SVitaly Wool return; 813d30561c5SVitaly Wool } 814d30561c5SVitaly Wool spin_lock(&pool->lock); 815d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 816d30561c5SVitaly Wool spin_unlock(&pool->lock); 817d30561c5SVitaly Wool 8185d03a661SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 8195d03a661SVitaly Wool atomic64_dec(&pool->pages_nr); 8205d03a661SVitaly Wool return; 8215d03a661SVitaly Wool } 8225d03a661SVitaly Wool 823dcf5aedbSVitaly Wool if (test_bit(PAGE_STALE, &page->private) || 824dcf5aedbSVitaly Wool test_and_set_bit(PAGE_CLAIMED, &page->private)) { 8251f862989SVitaly Wool z3fold_page_unlock(zhdr); 8261f862989SVitaly Wool return; 8271f862989SVitaly Wool } 8281f862989SVitaly Wool 8294a3ac931SVitaly Wool if (!zhdr->foreign_handles && buddy_single(zhdr) && 8304a3ac931SVitaly Wool zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 8314a3ac931SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) 8324a3ac931SVitaly Wool atomic64_dec(&pool->pages_nr); 833dcf5aedbSVitaly Wool else { 834dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 8354a3ac931SVitaly Wool z3fold_page_unlock(zhdr); 836dcf5aedbSVitaly Wool } 8374a3ac931SVitaly Wool return; 8384a3ac931SVitaly Wool } 8394a3ac931SVitaly Wool 840d30561c5SVitaly Wool z3fold_compact_page(zhdr); 8419050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 842dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 843d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 844d30561c5SVitaly Wool } 845d30561c5SVitaly Wool 846d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w) 847d30561c5SVitaly Wool { 848d30561c5SVitaly Wool struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 849d30561c5SVitaly Wool work); 850d30561c5SVitaly Wool 851d30561c5SVitaly Wool do_compact_page(zhdr, false); 852d30561c5SVitaly Wool } 853d30561c5SVitaly Wool 8549050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */ 8559050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 8569050cce1SVitaly Wool size_t size, bool can_sleep) 8579050cce1SVitaly Wool { 8589050cce1SVitaly Wool struct z3fold_header *zhdr = NULL; 8599050cce1SVitaly Wool struct page *page; 8609050cce1SVitaly Wool struct list_head *unbuddied; 8619050cce1SVitaly Wool int chunks = size_to_chunks(size), i; 8629050cce1SVitaly Wool 8639050cce1SVitaly Wool lookup: 864135f97fdSVitaly Wool migrate_disable(); 8659050cce1SVitaly Wool /* First, try to find an unbuddied z3fold page. */ 866135f97fdSVitaly Wool unbuddied = this_cpu_ptr(pool->unbuddied); 8679050cce1SVitaly Wool for_each_unbuddied_list(i, chunks) { 8689050cce1SVitaly Wool struct list_head *l = &unbuddied[i]; 8699050cce1SVitaly Wool 8709050cce1SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 8719050cce1SVitaly Wool struct z3fold_header, buddy); 8729050cce1SVitaly Wool 8739050cce1SVitaly Wool if (!zhdr) 8749050cce1SVitaly Wool continue; 8759050cce1SVitaly Wool 8769050cce1SVitaly Wool /* Re-check under lock. */ 8779050cce1SVitaly Wool spin_lock(&pool->lock); 8789050cce1SVitaly Wool l = &unbuddied[i]; 8799050cce1SVitaly Wool if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 8809050cce1SVitaly Wool struct z3fold_header, buddy)) || 8819050cce1SVitaly Wool !z3fold_page_trylock(zhdr)) { 8829050cce1SVitaly Wool spin_unlock(&pool->lock); 8839050cce1SVitaly Wool zhdr = NULL; 884135f97fdSVitaly Wool migrate_enable(); 8859050cce1SVitaly Wool if (can_sleep) 8869050cce1SVitaly Wool cond_resched(); 8879050cce1SVitaly Wool goto lookup; 8889050cce1SVitaly Wool } 8899050cce1SVitaly Wool list_del_init(&zhdr->buddy); 8909050cce1SVitaly Wool zhdr->cpu = -1; 8919050cce1SVitaly Wool spin_unlock(&pool->lock); 8929050cce1SVitaly Wool 8939050cce1SVitaly Wool page = virt_to_page(zhdr); 8944a3ac931SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 8954a3ac931SVitaly Wool test_bit(PAGE_CLAIMED, &page->private)) { 8969050cce1SVitaly Wool z3fold_page_unlock(zhdr); 8979050cce1SVitaly Wool zhdr = NULL; 898135f97fdSVitaly Wool migrate_enable(); 8999050cce1SVitaly Wool if (can_sleep) 9009050cce1SVitaly Wool cond_resched(); 9019050cce1SVitaly Wool goto lookup; 9029050cce1SVitaly Wool } 9039050cce1SVitaly Wool 9049050cce1SVitaly Wool /* 9059050cce1SVitaly Wool * this page could not be removed from its unbuddied 9069050cce1SVitaly Wool * list while pool lock was held, and then we've taken 9079050cce1SVitaly Wool * page lock so kref_put could not be called before 9089050cce1SVitaly Wool * we got here, so it's safe to just call kref_get() 9099050cce1SVitaly Wool */ 9109050cce1SVitaly Wool kref_get(&zhdr->refcount); 9119050cce1SVitaly Wool break; 9129050cce1SVitaly Wool } 913135f97fdSVitaly Wool migrate_enable(); 9149050cce1SVitaly Wool 915351618b2SVitaly Wool if (!zhdr) { 916351618b2SVitaly Wool int cpu; 917351618b2SVitaly Wool 918351618b2SVitaly Wool /* look for _exact_ match on other cpus' lists */ 919351618b2SVitaly Wool for_each_online_cpu(cpu) { 920351618b2SVitaly Wool struct list_head *l; 921351618b2SVitaly Wool 922351618b2SVitaly Wool unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 923351618b2SVitaly Wool spin_lock(&pool->lock); 924351618b2SVitaly Wool l = &unbuddied[chunks]; 925351618b2SVitaly Wool 926351618b2SVitaly Wool zhdr = list_first_entry_or_null(READ_ONCE(l), 927351618b2SVitaly Wool struct z3fold_header, buddy); 928351618b2SVitaly Wool 929351618b2SVitaly Wool if (!zhdr || !z3fold_page_trylock(zhdr)) { 930351618b2SVitaly Wool spin_unlock(&pool->lock); 931351618b2SVitaly Wool zhdr = NULL; 932351618b2SVitaly Wool continue; 933351618b2SVitaly Wool } 934351618b2SVitaly Wool list_del_init(&zhdr->buddy); 935351618b2SVitaly Wool zhdr->cpu = -1; 936351618b2SVitaly Wool spin_unlock(&pool->lock); 937351618b2SVitaly Wool 938351618b2SVitaly Wool page = virt_to_page(zhdr); 9394a3ac931SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 9404a3ac931SVitaly Wool test_bit(PAGE_CLAIMED, &page->private)) { 941351618b2SVitaly Wool z3fold_page_unlock(zhdr); 942351618b2SVitaly Wool zhdr = NULL; 943351618b2SVitaly Wool if (can_sleep) 944351618b2SVitaly Wool cond_resched(); 945351618b2SVitaly Wool continue; 946351618b2SVitaly Wool } 947351618b2SVitaly Wool kref_get(&zhdr->refcount); 948351618b2SVitaly Wool break; 949351618b2SVitaly Wool } 950351618b2SVitaly Wool } 951351618b2SVitaly Wool 952fc548865SVitaly Wool if (zhdr && !zhdr->slots) 953fc548865SVitaly Wool zhdr->slots = alloc_slots(pool, 954fc548865SVitaly Wool can_sleep ? GFP_NOIO : GFP_ATOMIC); 9559050cce1SVitaly Wool return zhdr; 9569050cce1SVitaly Wool } 957d30561c5SVitaly Wool 958d30561c5SVitaly Wool /* 959d30561c5SVitaly Wool * API Functions 960d30561c5SVitaly Wool */ 961d30561c5SVitaly Wool 962d30561c5SVitaly Wool /** 963d30561c5SVitaly Wool * z3fold_create_pool() - create a new z3fold pool 964d30561c5SVitaly Wool * @name: pool name 965d30561c5SVitaly Wool * @gfp: gfp flags when allocating the z3fold pool structure 966d30561c5SVitaly Wool * @ops: user-defined operations for the z3fold pool 967d30561c5SVitaly Wool * 968d30561c5SVitaly Wool * Return: pointer to the new z3fold pool or NULL if the metadata allocation 969d30561c5SVitaly Wool * failed. 970d30561c5SVitaly Wool */ 971d30561c5SVitaly Wool static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 972d30561c5SVitaly Wool const struct z3fold_ops *ops) 973d30561c5SVitaly Wool { 974d30561c5SVitaly Wool struct z3fold_pool *pool = NULL; 975d30561c5SVitaly Wool int i, cpu; 976d30561c5SVitaly Wool 977d30561c5SVitaly Wool pool = kzalloc(sizeof(struct z3fold_pool), gfp); 978d30561c5SVitaly Wool if (!pool) 979d30561c5SVitaly Wool goto out; 9807c2b8baaSVitaly Wool pool->c_handle = kmem_cache_create("z3fold_handle", 9817c2b8baaSVitaly Wool sizeof(struct z3fold_buddy_slots), 9827c2b8baaSVitaly Wool SLOTS_ALIGN, 0, NULL); 9837c2b8baaSVitaly Wool if (!pool->c_handle) 9847c2b8baaSVitaly Wool goto out_c; 985d30561c5SVitaly Wool spin_lock_init(&pool->lock); 986d30561c5SVitaly Wool spin_lock_init(&pool->stale_lock); 987e891f60eSMiaohe Lin pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, 988e891f60eSMiaohe Lin __alignof__(struct list_head)); 9891ec6995dSXidong Wang if (!pool->unbuddied) 9901ec6995dSXidong Wang goto out_pool; 991d30561c5SVitaly Wool for_each_possible_cpu(cpu) { 992d30561c5SVitaly Wool struct list_head *unbuddied = 993d30561c5SVitaly Wool per_cpu_ptr(pool->unbuddied, cpu); 994d30561c5SVitaly Wool for_each_unbuddied_list(i, 0) 995d30561c5SVitaly Wool INIT_LIST_HEAD(&unbuddied[i]); 996d30561c5SVitaly Wool } 997d30561c5SVitaly Wool INIT_LIST_HEAD(&pool->lru); 998d30561c5SVitaly Wool INIT_LIST_HEAD(&pool->stale); 999d30561c5SVitaly Wool atomic64_set(&pool->pages_nr, 0); 1000d30561c5SVitaly Wool pool->name = name; 1001d30561c5SVitaly Wool pool->compact_wq = create_singlethread_workqueue(pool->name); 1002d30561c5SVitaly Wool if (!pool->compact_wq) 10031ec6995dSXidong Wang goto out_unbuddied; 1004d30561c5SVitaly Wool pool->release_wq = create_singlethread_workqueue(pool->name); 1005d30561c5SVitaly Wool if (!pool->release_wq) 1006d30561c5SVitaly Wool goto out_wq; 10071f862989SVitaly Wool if (z3fold_register_migration(pool)) 10081f862989SVitaly Wool goto out_rwq; 1009d30561c5SVitaly Wool INIT_WORK(&pool->work, free_pages_work); 1010d30561c5SVitaly Wool pool->ops = ops; 1011d30561c5SVitaly Wool return pool; 1012d30561c5SVitaly Wool 10131f862989SVitaly Wool out_rwq: 10141f862989SVitaly Wool destroy_workqueue(pool->release_wq); 1015d30561c5SVitaly Wool out_wq: 1016d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 10171ec6995dSXidong Wang out_unbuddied: 10181ec6995dSXidong Wang free_percpu(pool->unbuddied); 10191ec6995dSXidong Wang out_pool: 10207c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 10217c2b8baaSVitaly Wool out_c: 1022d30561c5SVitaly Wool kfree(pool); 10231ec6995dSXidong Wang out: 1024d30561c5SVitaly Wool return NULL; 1025d30561c5SVitaly Wool } 1026d30561c5SVitaly Wool 1027d30561c5SVitaly Wool /** 1028d30561c5SVitaly Wool * z3fold_destroy_pool() - destroys an existing z3fold pool 1029d30561c5SVitaly Wool * @pool: the z3fold pool to be destroyed 1030d30561c5SVitaly Wool * 1031d30561c5SVitaly Wool * The pool should be emptied before this function is called. 1032d30561c5SVitaly Wool */ 1033d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool) 1034d30561c5SVitaly Wool { 10357c2b8baaSVitaly Wool kmem_cache_destroy(pool->c_handle); 10366051d3bdSHenry Burns 10376051d3bdSHenry Burns /* 10386051d3bdSHenry Burns * We need to destroy pool->compact_wq before pool->release_wq, 10396051d3bdSHenry Burns * as any pending work on pool->compact_wq will call 10406051d3bdSHenry Burns * queue_work(pool->release_wq, &pool->work). 1041b997052bSHenry Burns * 1042b997052bSHenry Burns * There are still outstanding pages until both workqueues are drained, 1043b997052bSHenry Burns * so we cannot unregister migration until then. 10446051d3bdSHenry Burns */ 10456051d3bdSHenry Burns 1046d30561c5SVitaly Wool destroy_workqueue(pool->compact_wq); 10476051d3bdSHenry Burns destroy_workqueue(pool->release_wq); 1048b997052bSHenry Burns z3fold_unregister_migration(pool); 1049d30561c5SVitaly Wool kfree(pool); 1050d30561c5SVitaly Wool } 1051d30561c5SVitaly Wool 10529a001fc1SVitaly Wool /** 10539a001fc1SVitaly Wool * z3fold_alloc() - allocates a region of a given size 10549a001fc1SVitaly Wool * @pool: z3fold pool from which to allocate 10559a001fc1SVitaly Wool * @size: size in bytes of the desired allocation 10569a001fc1SVitaly Wool * @gfp: gfp flags used if the pool needs to grow 10579a001fc1SVitaly Wool * @handle: handle of the new allocation 10589a001fc1SVitaly Wool * 10599a001fc1SVitaly Wool * This function will attempt to find a free region in the pool large enough to 10609a001fc1SVitaly Wool * satisfy the allocation request. A search of the unbuddied lists is 10619a001fc1SVitaly Wool * performed first. If no suitable free region is found, then a new page is 10629a001fc1SVitaly Wool * allocated and added to the pool to satisfy the request. 10639a001fc1SVitaly Wool * 10649a001fc1SVitaly Wool * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 10659a001fc1SVitaly Wool * as z3fold pool pages. 10669a001fc1SVitaly Wool * 10679a001fc1SVitaly Wool * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 10689a001fc1SVitaly Wool * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 10699a001fc1SVitaly Wool * a new page. 10709a001fc1SVitaly Wool */ 10719a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 10729a001fc1SVitaly Wool unsigned long *handle) 10739a001fc1SVitaly Wool { 10749050cce1SVitaly Wool int chunks = size_to_chunks(size); 10759a001fc1SVitaly Wool struct z3fold_header *zhdr = NULL; 1076d30561c5SVitaly Wool struct page *page = NULL; 10779a001fc1SVitaly Wool enum buddy bud; 10788a97ea54SMatthew Wilcox bool can_sleep = gfpflags_allow_blocking(gfp); 10799a001fc1SVitaly Wool 1080f1549cb5SHenry Burns if (!size) 10819a001fc1SVitaly Wool return -EINVAL; 10829a001fc1SVitaly Wool 10839a001fc1SVitaly Wool if (size > PAGE_SIZE) 10849a001fc1SVitaly Wool return -ENOSPC; 10859a001fc1SVitaly Wool 10869a001fc1SVitaly Wool if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 10879a001fc1SVitaly Wool bud = HEADLESS; 10889a001fc1SVitaly Wool else { 10899050cce1SVitaly Wool retry: 10909050cce1SVitaly Wool zhdr = __z3fold_alloc(pool, size, can_sleep); 1091d30561c5SVitaly Wool if (zhdr) { 1092dcf5aedbSVitaly Wool bud = get_free_buddy(zhdr, chunks); 1093dcf5aedbSVitaly Wool if (bud == HEADLESS) { 10945a27aa82SVitaly Wool if (kref_put(&zhdr->refcount, 1095d30561c5SVitaly Wool release_z3fold_page_locked)) 10965a27aa82SVitaly Wool atomic64_dec(&pool->pages_nr); 1097d30561c5SVitaly Wool else 1098d30561c5SVitaly Wool z3fold_page_unlock(zhdr); 10999a001fc1SVitaly Wool pr_err("No free chunks in unbuddied\n"); 11009a001fc1SVitaly Wool WARN_ON(1); 11019050cce1SVitaly Wool goto retry; 11029a001fc1SVitaly Wool } 11039050cce1SVitaly Wool page = virt_to_page(zhdr); 11049a001fc1SVitaly Wool goto found; 11059a001fc1SVitaly Wool } 11069a001fc1SVitaly Wool bud = FIRST; 11079a001fc1SVitaly Wool } 11089a001fc1SVitaly Wool 11095c9bab59SVitaly Wool page = NULL; 11105c9bab59SVitaly Wool if (can_sleep) { 1111d30561c5SVitaly Wool spin_lock(&pool->stale_lock); 1112d30561c5SVitaly Wool zhdr = list_first_entry_or_null(&pool->stale, 1113d30561c5SVitaly Wool struct z3fold_header, buddy); 1114d30561c5SVitaly Wool /* 11155c9bab59SVitaly Wool * Before allocating a page, let's see if we can take one from 11165c9bab59SVitaly Wool * the stale pages list. cancel_work_sync() can sleep so we 11175c9bab59SVitaly Wool * limit this case to the contexts where we can sleep 1118d30561c5SVitaly Wool */ 11195c9bab59SVitaly Wool if (zhdr) { 1120d30561c5SVitaly Wool list_del(&zhdr->buddy); 1121d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 1122d30561c5SVitaly Wool cancel_work_sync(&zhdr->work); 1123d30561c5SVitaly Wool page = virt_to_page(zhdr); 1124d30561c5SVitaly Wool } else { 1125d30561c5SVitaly Wool spin_unlock(&pool->stale_lock); 1126d30561c5SVitaly Wool } 11275c9bab59SVitaly Wool } 11285c9bab59SVitaly Wool if (!page) 11295c9bab59SVitaly Wool page = alloc_page(gfp); 1130d30561c5SVitaly Wool 11319a001fc1SVitaly Wool if (!page) 11329a001fc1SVitaly Wool return -ENOMEM; 11332f1e5e4dSVitaly Wool 113463398413SVitaly Wool zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 11359050cce1SVitaly Wool if (!zhdr) { 11369050cce1SVitaly Wool __free_page(page); 11379050cce1SVitaly Wool return -ENOMEM; 11389050cce1SVitaly Wool } 11399050cce1SVitaly Wool atomic64_inc(&pool->pages_nr); 11409a001fc1SVitaly Wool 11419a001fc1SVitaly Wool if (bud == HEADLESS) { 11429a001fc1SVitaly Wool set_bit(PAGE_HEADLESS, &page->private); 11439a001fc1SVitaly Wool goto headless; 11449a001fc1SVitaly Wool } 1145810481a2SHenry Burns if (can_sleep) { 1146810481a2SHenry Burns lock_page(page); 11471f862989SVitaly Wool __SetPageMovable(page, pool->inode->i_mapping); 1148810481a2SHenry Burns unlock_page(page); 1149810481a2SHenry Burns } else { 1150810481a2SHenry Burns if (trylock_page(page)) { 1151810481a2SHenry Burns __SetPageMovable(page, pool->inode->i_mapping); 1152810481a2SHenry Burns unlock_page(page); 1153810481a2SHenry Burns } 1154810481a2SHenry Burns } 11552f1e5e4dSVitaly Wool z3fold_page_lock(zhdr); 11569a001fc1SVitaly Wool 11579a001fc1SVitaly Wool found: 11589a001fc1SVitaly Wool if (bud == FIRST) 11599a001fc1SVitaly Wool zhdr->first_chunks = chunks; 11609a001fc1SVitaly Wool else if (bud == LAST) 11619a001fc1SVitaly Wool zhdr->last_chunks = chunks; 11629a001fc1SVitaly Wool else { 11639a001fc1SVitaly Wool zhdr->middle_chunks = chunks; 1164ede93213SVitaly Wool zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 11659a001fc1SVitaly Wool } 11669050cce1SVitaly Wool add_to_unbuddied(pool, zhdr); 11679a001fc1SVitaly Wool 11689a001fc1SVitaly Wool headless: 1169d30561c5SVitaly Wool spin_lock(&pool->lock); 11709a001fc1SVitaly Wool /* Add/move z3fold page to beginning of LRU */ 11719a001fc1SVitaly Wool if (!list_empty(&page->lru)) 11729a001fc1SVitaly Wool list_del(&page->lru); 11739a001fc1SVitaly Wool 11749a001fc1SVitaly Wool list_add(&page->lru, &pool->lru); 11759a001fc1SVitaly Wool 11769a001fc1SVitaly Wool *handle = encode_handle(zhdr, bud); 11779a001fc1SVitaly Wool spin_unlock(&pool->lock); 11782f1e5e4dSVitaly Wool if (bud != HEADLESS) 11792f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 11809a001fc1SVitaly Wool 11819a001fc1SVitaly Wool return 0; 11829a001fc1SVitaly Wool } 11839a001fc1SVitaly Wool 11849a001fc1SVitaly Wool /** 11859a001fc1SVitaly Wool * z3fold_free() - frees the allocation associated with the given handle 11869a001fc1SVitaly Wool * @pool: pool in which the allocation resided 11879a001fc1SVitaly Wool * @handle: handle associated with the allocation returned by z3fold_alloc() 11889a001fc1SVitaly Wool * 11899a001fc1SVitaly Wool * In the case that the z3fold page in which the allocation resides is under 11909a001fc1SVitaly Wool * reclaim, as indicated by the PG_reclaim flag being set, this function 11919a001fc1SVitaly Wool * only sets the first|last_chunks to 0. The page is actually freed 11929a001fc1SVitaly Wool * once both buddies are evicted (see z3fold_reclaim_page() below). 11939a001fc1SVitaly Wool */ 11949a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 11959a001fc1SVitaly Wool { 11969a001fc1SVitaly Wool struct z3fold_header *zhdr; 11979a001fc1SVitaly Wool struct page *page; 11989a001fc1SVitaly Wool enum buddy bud; 11995b6807deSVitaly Wool bool page_claimed; 12009a001fc1SVitaly Wool 12014a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 12029a001fc1SVitaly Wool page = virt_to_page(zhdr); 12035b6807deSVitaly Wool page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 12049a001fc1SVitaly Wool 12059a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) { 1206ca0246bbSVitaly Wool /* if a headless page is under reclaim, just leave. 1207ca0246bbSVitaly Wool * NB: we use test_and_set_bit for a reason: if the bit 1208ca0246bbSVitaly Wool * has not been set before, we release this page 1209ca0246bbSVitaly Wool * immediately so we don't care about its value any more. 1210ca0246bbSVitaly Wool */ 12115b6807deSVitaly Wool if (!page_claimed) { 1212ca0246bbSVitaly Wool spin_lock(&pool->lock); 1213ca0246bbSVitaly Wool list_del(&page->lru); 1214ca0246bbSVitaly Wool spin_unlock(&pool->lock); 12154a3ac931SVitaly Wool put_z3fold_header(zhdr); 12161f862989SVitaly Wool free_z3fold_page(page, true); 1217ca0246bbSVitaly Wool atomic64_dec(&pool->pages_nr); 1218ca0246bbSVitaly Wool } 1219ca0246bbSVitaly Wool return; 1220ca0246bbSVitaly Wool } 1221ca0246bbSVitaly Wool 1222ca0246bbSVitaly Wool /* Non-headless case */ 122343afc194SVitaly Wool bud = handle_to_buddy(handle); 12249a001fc1SVitaly Wool 12259a001fc1SVitaly Wool switch (bud) { 12269a001fc1SVitaly Wool case FIRST: 12279a001fc1SVitaly Wool zhdr->first_chunks = 0; 12289a001fc1SVitaly Wool break; 12299a001fc1SVitaly Wool case MIDDLE: 12309a001fc1SVitaly Wool zhdr->middle_chunks = 0; 12319a001fc1SVitaly Wool break; 12329a001fc1SVitaly Wool case LAST: 12339a001fc1SVitaly Wool zhdr->last_chunks = 0; 12349a001fc1SVitaly Wool break; 12359a001fc1SVitaly Wool default: 12369a001fc1SVitaly Wool pr_err("%s: unknown bud %d\n", __func__, bud); 12379a001fc1SVitaly Wool WARN_ON(1); 12384a3ac931SVitaly Wool put_z3fold_header(zhdr); 12399a001fc1SVitaly Wool return; 12409a001fc1SVitaly Wool } 12419a001fc1SVitaly Wool 12424a3ac931SVitaly Wool if (!page_claimed) 1243fc548865SVitaly Wool free_handle(handle, zhdr); 1244d30561c5SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1245d30561c5SVitaly Wool atomic64_dec(&pool->pages_nr); 1246d30561c5SVitaly Wool return; 1247d30561c5SVitaly Wool } 12485b6807deSVitaly Wool if (page_claimed) { 12495b6807deSVitaly Wool /* the page has not been claimed by us */ 12506098d7e1SVitaly Wool z3fold_page_unlock(zhdr); 12516098d7e1SVitaly Wool return; 12526098d7e1SVitaly Wool } 1253dcf5aedbSVitaly Wool if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 12544a3ac931SVitaly Wool put_z3fold_header(zhdr); 12555b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 1256d30561c5SVitaly Wool return; 1257d30561c5SVitaly Wool } 1258d30561c5SVitaly Wool if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1259d30561c5SVitaly Wool spin_lock(&pool->lock); 1260d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 1261d30561c5SVitaly Wool spin_unlock(&pool->lock); 1262d30561c5SVitaly Wool zhdr->cpu = -1; 12635d03a661SVitaly Wool kref_get(&zhdr->refcount); 12645b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 12654a3ac931SVitaly Wool do_compact_page(zhdr, true); 1266d30561c5SVitaly Wool return; 1267d30561c5SVitaly Wool } 12685d03a661SVitaly Wool kref_get(&zhdr->refcount); 12695b6807deSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 12704a3ac931SVitaly Wool queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 12714a3ac931SVitaly Wool put_z3fold_header(zhdr); 12729a001fc1SVitaly Wool } 12739a001fc1SVitaly Wool 12749a001fc1SVitaly Wool /** 12759a001fc1SVitaly Wool * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 12769a001fc1SVitaly Wool * @pool: pool from which a page will attempt to be evicted 1277f144c390SMike Rapoport * @retries: number of pages on the LRU list for which eviction will 12789a001fc1SVitaly Wool * be attempted before failing 12799a001fc1SVitaly Wool * 12809a001fc1SVitaly Wool * z3fold reclaim is different from normal system reclaim in that it is done 12819a001fc1SVitaly Wool * from the bottom, up. This is because only the bottom layer, z3fold, has 12829a001fc1SVitaly Wool * information on how the allocations are organized within each z3fold page. 12839a001fc1SVitaly Wool * This has the potential to create interesting locking situations between 12849a001fc1SVitaly Wool * z3fold and the user, however. 12859a001fc1SVitaly Wool * 12869a001fc1SVitaly Wool * To avoid these, this is how z3fold_reclaim_page() should be called: 1287f144c390SMike Rapoport * 12889a001fc1SVitaly Wool * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 12899a001fc1SVitaly Wool * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 12909a001fc1SVitaly Wool * call the user-defined eviction handler with the pool and handle as 12919a001fc1SVitaly Wool * arguments. 12929a001fc1SVitaly Wool * 12939a001fc1SVitaly Wool * If the handle can not be evicted, the eviction handler should return 12949a001fc1SVitaly Wool * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 12959a001fc1SVitaly Wool * appropriate list and try the next z3fold page on the LRU up to 12969a001fc1SVitaly Wool * a user defined number of retries. 12979a001fc1SVitaly Wool * 12989a001fc1SVitaly Wool * If the handle is successfully evicted, the eviction handler should 12999a001fc1SVitaly Wool * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 13009a001fc1SVitaly Wool * contains logic to delay freeing the page if the page is under reclaim, 13019a001fc1SVitaly Wool * as indicated by the setting of the PG_reclaim flag on the underlying page. 13029a001fc1SVitaly Wool * 13039a001fc1SVitaly Wool * If all buddies in the z3fold page are successfully evicted, then the 13049a001fc1SVitaly Wool * z3fold page can be freed. 13059a001fc1SVitaly Wool * 13069a001fc1SVitaly Wool * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 13079a001fc1SVitaly Wool * no pages to evict or an eviction handler is not registered, -EAGAIN if 13089a001fc1SVitaly Wool * the retry limit was hit. 13099a001fc1SVitaly Wool */ 13109a001fc1SVitaly Wool static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 13119a001fc1SVitaly Wool { 13124a3ac931SVitaly Wool int i, ret = -1; 1313d30561c5SVitaly Wool struct z3fold_header *zhdr = NULL; 1314d30561c5SVitaly Wool struct page *page = NULL; 1315d30561c5SVitaly Wool struct list_head *pos; 13169a001fc1SVitaly Wool unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 1317dcf5aedbSVitaly Wool struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN))); 1318dcf5aedbSVitaly Wool 1319dcf5aedbSVitaly Wool rwlock_init(&slots.lock); 1320dcf5aedbSVitaly Wool slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE); 13219a001fc1SVitaly Wool 13229a001fc1SVitaly Wool spin_lock(&pool->lock); 13232f1e5e4dSVitaly Wool if (!pool->ops || !pool->ops->evict || retries == 0) { 13249a001fc1SVitaly Wool spin_unlock(&pool->lock); 13259a001fc1SVitaly Wool return -EINVAL; 13269a001fc1SVitaly Wool } 13279a001fc1SVitaly Wool for (i = 0; i < retries; i++) { 13282f1e5e4dSVitaly Wool if (list_empty(&pool->lru)) { 13292f1e5e4dSVitaly Wool spin_unlock(&pool->lock); 13302f1e5e4dSVitaly Wool return -EINVAL; 13312f1e5e4dSVitaly Wool } 1332d30561c5SVitaly Wool list_for_each_prev(pos, &pool->lru) { 1333d30561c5SVitaly Wool page = list_entry(pos, struct page, lru); 1334ca0246bbSVitaly Wool 13353f9d2b57SVitaly Wool zhdr = page_address(page); 13366d679578SThomas Hebb if (test_bit(PAGE_HEADLESS, &page->private)) { 13376d679578SThomas Hebb /* 13386d679578SThomas Hebb * For non-headless pages, we wait to do this 13396d679578SThomas Hebb * until we have the page lock to avoid racing 13406d679578SThomas Hebb * with __z3fold_alloc(). Headless pages don't 13416d679578SThomas Hebb * have a lock (and __z3fold_alloc() will never 13426d679578SThomas Hebb * see them), but we still need to test and set 13436d679578SThomas Hebb * PAGE_CLAIMED to avoid racing with 13446d679578SThomas Hebb * z3fold_free(), so just do it now before 13456d679578SThomas Hebb * leaving the loop. 13466d679578SThomas Hebb */ 13476d679578SThomas Hebb if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 13486d679578SThomas Hebb continue; 13496d679578SThomas Hebb 1350ca0246bbSVitaly Wool break; 13516d679578SThomas Hebb } 1352ca0246bbSVitaly Wool 1353dcf5aedbSVitaly Wool if (kref_get_unless_zero(&zhdr->refcount) == 0) { 1354dcf5aedbSVitaly Wool zhdr = NULL; 1355dcf5aedbSVitaly Wool break; 1356dcf5aedbSVitaly Wool } 1357ca0246bbSVitaly Wool if (!z3fold_page_trylock(zhdr)) { 1358dcf5aedbSVitaly Wool if (kref_put(&zhdr->refcount, 1359dcf5aedbSVitaly Wool release_z3fold_page)) 1360dcf5aedbSVitaly Wool atomic64_dec(&pool->pages_nr); 1361ca0246bbSVitaly Wool zhdr = NULL; 1362d30561c5SVitaly Wool continue; /* can't evict at this point */ 1363ca0246bbSVitaly Wool } 1364dcf5aedbSVitaly Wool 1365dcf5aedbSVitaly Wool /* test_and_set_bit is of course atomic, but we still 1366dcf5aedbSVitaly Wool * need to do it under page lock, otherwise checking 1367dcf5aedbSVitaly Wool * that bit in __z3fold_alloc wouldn't make sense 1368dcf5aedbSVitaly Wool */ 1369dcf5aedbSVitaly Wool if (zhdr->foreign_handles || 1370dcf5aedbSVitaly Wool test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1371dcf5aedbSVitaly Wool if (kref_put(&zhdr->refcount, 1372dcf5aedbSVitaly Wool release_z3fold_page)) 1373dcf5aedbSVitaly Wool atomic64_dec(&pool->pages_nr); 1374dcf5aedbSVitaly Wool else 13754a3ac931SVitaly Wool z3fold_page_unlock(zhdr); 13764a3ac931SVitaly Wool zhdr = NULL; 13774a3ac931SVitaly Wool continue; /* can't evict such page */ 13784a3ac931SVitaly Wool } 1379d30561c5SVitaly Wool list_del_init(&zhdr->buddy); 1380d30561c5SVitaly Wool zhdr->cpu = -1; 13816098d7e1SVitaly Wool break; 1382d30561c5SVitaly Wool } 1383d30561c5SVitaly Wool 1384ca0246bbSVitaly Wool if (!zhdr) 1385ca0246bbSVitaly Wool break; 1386ca0246bbSVitaly Wool 1387d30561c5SVitaly Wool list_del_init(&page->lru); 13882f1e5e4dSVitaly Wool spin_unlock(&pool->lock); 1389d30561c5SVitaly Wool 1390d30561c5SVitaly Wool if (!test_bit(PAGE_HEADLESS, &page->private)) { 13919a001fc1SVitaly Wool /* 13923f9d2b57SVitaly Wool * We need encode the handles before unlocking, and 13933f9d2b57SVitaly Wool * use our local slots structure because z3fold_free 13943f9d2b57SVitaly Wool * can zero out zhdr->slots and we can't do much 13953f9d2b57SVitaly Wool * about that 13969a001fc1SVitaly Wool */ 13979a001fc1SVitaly Wool first_handle = 0; 13989a001fc1SVitaly Wool last_handle = 0; 13999a001fc1SVitaly Wool middle_handle = 0; 1400dcf5aedbSVitaly Wool memset(slots.slot, 0, sizeof(slots.slot)); 14019a001fc1SVitaly Wool if (zhdr->first_chunks) 1402dcf5aedbSVitaly Wool first_handle = __encode_handle(zhdr, &slots, 1403dcf5aedbSVitaly Wool FIRST); 14049a001fc1SVitaly Wool if (zhdr->middle_chunks) 1405dcf5aedbSVitaly Wool middle_handle = __encode_handle(zhdr, &slots, 1406dcf5aedbSVitaly Wool MIDDLE); 14079a001fc1SVitaly Wool if (zhdr->last_chunks) 1408dcf5aedbSVitaly Wool last_handle = __encode_handle(zhdr, &slots, 1409dcf5aedbSVitaly Wool LAST); 1410d30561c5SVitaly Wool /* 1411d30561c5SVitaly Wool * it's safe to unlock here because we hold a 1412d30561c5SVitaly Wool * reference to this page 1413d30561c5SVitaly Wool */ 14142f1e5e4dSVitaly Wool z3fold_page_unlock(zhdr); 14159a001fc1SVitaly Wool } else { 14164a3ac931SVitaly Wool first_handle = encode_handle(zhdr, HEADLESS); 14179a001fc1SVitaly Wool last_handle = middle_handle = 0; 14182f1e5e4dSVitaly Wool } 14199a001fc1SVitaly Wool /* Issue the eviction callback(s) */ 14209a001fc1SVitaly Wool if (middle_handle) { 14219a001fc1SVitaly Wool ret = pool->ops->evict(pool, middle_handle); 14229a001fc1SVitaly Wool if (ret) 14239a001fc1SVitaly Wool goto next; 14249a001fc1SVitaly Wool } 14259a001fc1SVitaly Wool if (first_handle) { 14269a001fc1SVitaly Wool ret = pool->ops->evict(pool, first_handle); 14279a001fc1SVitaly Wool if (ret) 14289a001fc1SVitaly Wool goto next; 14299a001fc1SVitaly Wool } 14309a001fc1SVitaly Wool if (last_handle) { 14319a001fc1SVitaly Wool ret = pool->ops->evict(pool, last_handle); 14329a001fc1SVitaly Wool if (ret) 14339a001fc1SVitaly Wool goto next; 14349a001fc1SVitaly Wool } 14359a001fc1SVitaly Wool next: 14365a27aa82SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) { 14375a27aa82SVitaly Wool if (ret == 0) { 14381f862989SVitaly Wool free_z3fold_page(page, true); 1439ca0246bbSVitaly Wool atomic64_dec(&pool->pages_nr); 14409a001fc1SVitaly Wool return 0; 14415a27aa82SVitaly Wool } 14426098d7e1SVitaly Wool spin_lock(&pool->lock); 14436098d7e1SVitaly Wool list_add(&page->lru, &pool->lru); 1444d5567c9dSVitaly Wool spin_unlock(&pool->lock); 14453f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 14466098d7e1SVitaly Wool } else { 1447dcf5aedbSVitaly Wool struct z3fold_buddy_slots *slots = zhdr->slots; 14486098d7e1SVitaly Wool z3fold_page_lock(zhdr); 14496098d7e1SVitaly Wool if (kref_put(&zhdr->refcount, 14506098d7e1SVitaly Wool release_z3fold_page_locked)) { 1451dcf5aedbSVitaly Wool kmem_cache_free(pool->c_handle, slots); 14526098d7e1SVitaly Wool atomic64_dec(&pool->pages_nr); 14535a27aa82SVitaly Wool return 0; 14545a27aa82SVitaly Wool } 14555a27aa82SVitaly Wool /* 14566098d7e1SVitaly Wool * if we are here, the page is still not completely 14576098d7e1SVitaly Wool * free. Take the global pool lock then to be able 14586098d7e1SVitaly Wool * to add it back to the lru list 14595a27aa82SVitaly Wool */ 14606098d7e1SVitaly Wool spin_lock(&pool->lock); 14619a001fc1SVitaly Wool list_add(&page->lru, &pool->lru); 14626098d7e1SVitaly Wool spin_unlock(&pool->lock); 14636098d7e1SVitaly Wool z3fold_page_unlock(zhdr); 14643f9d2b57SVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 14656098d7e1SVitaly Wool } 14666098d7e1SVitaly Wool 14676098d7e1SVitaly Wool /* We started off locked to we need to lock the pool back */ 14686098d7e1SVitaly Wool spin_lock(&pool->lock); 14699a001fc1SVitaly Wool } 14709a001fc1SVitaly Wool spin_unlock(&pool->lock); 14719a001fc1SVitaly Wool return -EAGAIN; 14729a001fc1SVitaly Wool } 14739a001fc1SVitaly Wool 14749a001fc1SVitaly Wool /** 14759a001fc1SVitaly Wool * z3fold_map() - maps the allocation associated with the given handle 14769a001fc1SVitaly Wool * @pool: pool in which the allocation resides 14779a001fc1SVitaly Wool * @handle: handle associated with the allocation to be mapped 14789a001fc1SVitaly Wool * 14799a001fc1SVitaly Wool * Extracts the buddy number from handle and constructs the pointer to the 14809a001fc1SVitaly Wool * correct starting chunk within the page. 14819a001fc1SVitaly Wool * 14829a001fc1SVitaly Wool * Returns: a pointer to the mapped allocation 14839a001fc1SVitaly Wool */ 14849a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 14859a001fc1SVitaly Wool { 14869a001fc1SVitaly Wool struct z3fold_header *zhdr; 14879a001fc1SVitaly Wool struct page *page; 14889a001fc1SVitaly Wool void *addr; 14899a001fc1SVitaly Wool enum buddy buddy; 14909a001fc1SVitaly Wool 14914a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 14929a001fc1SVitaly Wool addr = zhdr; 14939a001fc1SVitaly Wool page = virt_to_page(zhdr); 14949a001fc1SVitaly Wool 14959a001fc1SVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 14969a001fc1SVitaly Wool goto out; 14979a001fc1SVitaly Wool 14989a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 14999a001fc1SVitaly Wool switch (buddy) { 15009a001fc1SVitaly Wool case FIRST: 15019a001fc1SVitaly Wool addr += ZHDR_SIZE_ALIGNED; 15029a001fc1SVitaly Wool break; 15039a001fc1SVitaly Wool case MIDDLE: 15049a001fc1SVitaly Wool addr += zhdr->start_middle << CHUNK_SHIFT; 15059a001fc1SVitaly Wool set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 15069a001fc1SVitaly Wool break; 15079a001fc1SVitaly Wool case LAST: 1508ca0246bbSVitaly Wool addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 15099a001fc1SVitaly Wool break; 15109a001fc1SVitaly Wool default: 15119a001fc1SVitaly Wool pr_err("unknown buddy id %d\n", buddy); 15129a001fc1SVitaly Wool WARN_ON(1); 15139a001fc1SVitaly Wool addr = NULL; 15149a001fc1SVitaly Wool break; 15159a001fc1SVitaly Wool } 15162f1e5e4dSVitaly Wool 15171f862989SVitaly Wool if (addr) 15181f862989SVitaly Wool zhdr->mapped_count++; 15199a001fc1SVitaly Wool out: 15204a3ac931SVitaly Wool put_z3fold_header(zhdr); 15219a001fc1SVitaly Wool return addr; 15229a001fc1SVitaly Wool } 15239a001fc1SVitaly Wool 15249a001fc1SVitaly Wool /** 15259a001fc1SVitaly Wool * z3fold_unmap() - unmaps the allocation associated with the given handle 15269a001fc1SVitaly Wool * @pool: pool in which the allocation resides 15279a001fc1SVitaly Wool * @handle: handle associated with the allocation to be unmapped 15289a001fc1SVitaly Wool */ 15299a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 15309a001fc1SVitaly Wool { 15319a001fc1SVitaly Wool struct z3fold_header *zhdr; 15329a001fc1SVitaly Wool struct page *page; 15339a001fc1SVitaly Wool enum buddy buddy; 15349a001fc1SVitaly Wool 15354a3ac931SVitaly Wool zhdr = get_z3fold_header(handle); 15369a001fc1SVitaly Wool page = virt_to_page(zhdr); 15379a001fc1SVitaly Wool 15382f1e5e4dSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 15399a001fc1SVitaly Wool return; 15409a001fc1SVitaly Wool 15419a001fc1SVitaly Wool buddy = handle_to_buddy(handle); 15429a001fc1SVitaly Wool if (buddy == MIDDLE) 15439a001fc1SVitaly Wool clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 15441f862989SVitaly Wool zhdr->mapped_count--; 15454a3ac931SVitaly Wool put_z3fold_header(zhdr); 15469a001fc1SVitaly Wool } 15479a001fc1SVitaly Wool 15489a001fc1SVitaly Wool /** 15499a001fc1SVitaly Wool * z3fold_get_pool_size() - gets the z3fold pool size in pages 15509a001fc1SVitaly Wool * @pool: pool whose size is being queried 15519a001fc1SVitaly Wool * 155212d59ae6SVitaly Wool * Returns: size in pages of the given pool. 15539a001fc1SVitaly Wool */ 15549a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 15559a001fc1SVitaly Wool { 155612d59ae6SVitaly Wool return atomic64_read(&pool->pages_nr); 15579a001fc1SVitaly Wool } 15589a001fc1SVitaly Wool 15591f862989SVitaly Wool static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 15601f862989SVitaly Wool { 15611f862989SVitaly Wool struct z3fold_header *zhdr; 15621f862989SVitaly Wool struct z3fold_pool *pool; 15631f862989SVitaly Wool 15641f862989SVitaly Wool VM_BUG_ON_PAGE(!PageMovable(page), page); 15651f862989SVitaly Wool VM_BUG_ON_PAGE(PageIsolated(page), page); 15661f862989SVitaly Wool 1567dcf5aedbSVitaly Wool if (test_bit(PAGE_HEADLESS, &page->private)) 15681f862989SVitaly Wool return false; 15691f862989SVitaly Wool 15701f862989SVitaly Wool zhdr = page_address(page); 15711f862989SVitaly Wool z3fold_page_lock(zhdr); 15721f862989SVitaly Wool if (test_bit(NEEDS_COMPACTING, &page->private) || 15731f862989SVitaly Wool test_bit(PAGE_STALE, &page->private)) 15741f862989SVitaly Wool goto out; 15751f862989SVitaly Wool 15764a3ac931SVitaly Wool if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 15774a3ac931SVitaly Wool goto out; 15781f862989SVitaly Wool 1579dcf5aedbSVitaly Wool if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 1580dcf5aedbSVitaly Wool goto out; 15814a3ac931SVitaly Wool pool = zhdr_to_pool(zhdr); 15824a3ac931SVitaly Wool spin_lock(&pool->lock); 15831f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 15841f862989SVitaly Wool list_del_init(&zhdr->buddy); 15851f862989SVitaly Wool if (!list_empty(&page->lru)) 15864a3ac931SVitaly Wool list_del_init(&page->lru); 15871f862989SVitaly Wool spin_unlock(&pool->lock); 15884a3ac931SVitaly Wool 15894a3ac931SVitaly Wool kref_get(&zhdr->refcount); 15901f862989SVitaly Wool z3fold_page_unlock(zhdr); 15911f862989SVitaly Wool return true; 15924a3ac931SVitaly Wool 15931f862989SVitaly Wool out: 15941f862989SVitaly Wool z3fold_page_unlock(zhdr); 15951f862989SVitaly Wool return false; 15961f862989SVitaly Wool } 15971f862989SVitaly Wool 15981f862989SVitaly Wool static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, 15991f862989SVitaly Wool struct page *page, enum migrate_mode mode) 16001f862989SVitaly Wool { 16011f862989SVitaly Wool struct z3fold_header *zhdr, *new_zhdr; 16021f862989SVitaly Wool struct z3fold_pool *pool; 16031f862989SVitaly Wool struct address_space *new_mapping; 16041f862989SVitaly Wool 16051f862989SVitaly Wool VM_BUG_ON_PAGE(!PageMovable(page), page); 16061f862989SVitaly Wool VM_BUG_ON_PAGE(!PageIsolated(page), page); 1607dcf5aedbSVitaly Wool VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 1608810481a2SHenry Burns VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 16091f862989SVitaly Wool 16101f862989SVitaly Wool zhdr = page_address(page); 16111f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 16121f862989SVitaly Wool 1613dcf5aedbSVitaly Wool if (!z3fold_page_trylock(zhdr)) 16141f862989SVitaly Wool return -EAGAIN; 16154a3ac931SVitaly Wool if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 16161f862989SVitaly Wool z3fold_page_unlock(zhdr); 1617dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 16181f862989SVitaly Wool return -EBUSY; 16191f862989SVitaly Wool } 1620c92d2f38SHenry Burns if (work_pending(&zhdr->work)) { 1621c92d2f38SHenry Burns z3fold_page_unlock(zhdr); 1622c92d2f38SHenry Burns return -EAGAIN; 1623c92d2f38SHenry Burns } 16241f862989SVitaly Wool new_zhdr = page_address(newpage); 16251f862989SVitaly Wool memcpy(new_zhdr, zhdr, PAGE_SIZE); 16261f862989SVitaly Wool newpage->private = page->private; 16271f862989SVitaly Wool page->private = 0; 16281f862989SVitaly Wool z3fold_page_unlock(zhdr); 16291f862989SVitaly Wool spin_lock_init(&new_zhdr->page_lock); 1630c92d2f38SHenry Burns INIT_WORK(&new_zhdr->work, compact_page_work); 1631c92d2f38SHenry Burns /* 1632c92d2f38SHenry Burns * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1633c92d2f38SHenry Burns * so we only have to reinitialize it. 1634c92d2f38SHenry Burns */ 1635c92d2f38SHenry Burns INIT_LIST_HEAD(&new_zhdr->buddy); 16361f862989SVitaly Wool new_mapping = page_mapping(page); 16371f862989SVitaly Wool __ClearPageMovable(page); 16381f862989SVitaly Wool ClearPagePrivate(page); 16391f862989SVitaly Wool 16401f862989SVitaly Wool get_page(newpage); 16411f862989SVitaly Wool z3fold_page_lock(new_zhdr); 16421f862989SVitaly Wool if (new_zhdr->first_chunks) 16431f862989SVitaly Wool encode_handle(new_zhdr, FIRST); 16441f862989SVitaly Wool if (new_zhdr->last_chunks) 16451f862989SVitaly Wool encode_handle(new_zhdr, LAST); 16461f862989SVitaly Wool if (new_zhdr->middle_chunks) 16471f862989SVitaly Wool encode_handle(new_zhdr, MIDDLE); 16481f862989SVitaly Wool set_bit(NEEDS_COMPACTING, &newpage->private); 16491f862989SVitaly Wool new_zhdr->cpu = smp_processor_id(); 16501f862989SVitaly Wool spin_lock(&pool->lock); 16511f862989SVitaly Wool list_add(&newpage->lru, &pool->lru); 16521f862989SVitaly Wool spin_unlock(&pool->lock); 16531f862989SVitaly Wool __SetPageMovable(newpage, new_mapping); 16541f862989SVitaly Wool z3fold_page_unlock(new_zhdr); 16551f862989SVitaly Wool 16561f862989SVitaly Wool queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 16571f862989SVitaly Wool 16581f862989SVitaly Wool page_mapcount_reset(page); 1659dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 16601f862989SVitaly Wool put_page(page); 16611f862989SVitaly Wool return 0; 16621f862989SVitaly Wool } 16631f862989SVitaly Wool 16641f862989SVitaly Wool static void z3fold_page_putback(struct page *page) 16651f862989SVitaly Wool { 16661f862989SVitaly Wool struct z3fold_header *zhdr; 16671f862989SVitaly Wool struct z3fold_pool *pool; 16681f862989SVitaly Wool 16691f862989SVitaly Wool zhdr = page_address(page); 16701f862989SVitaly Wool pool = zhdr_to_pool(zhdr); 16711f862989SVitaly Wool 16721f862989SVitaly Wool z3fold_page_lock(zhdr); 16731f862989SVitaly Wool if (!list_empty(&zhdr->buddy)) 16741f862989SVitaly Wool list_del_init(&zhdr->buddy); 16751f862989SVitaly Wool INIT_LIST_HEAD(&page->lru); 16761f862989SVitaly Wool if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 16771f862989SVitaly Wool atomic64_dec(&pool->pages_nr); 16781f862989SVitaly Wool return; 16791f862989SVitaly Wool } 16801f862989SVitaly Wool spin_lock(&pool->lock); 16811f862989SVitaly Wool list_add(&page->lru, &pool->lru); 16821f862989SVitaly Wool spin_unlock(&pool->lock); 1683dcf5aedbSVitaly Wool clear_bit(PAGE_CLAIMED, &page->private); 16841f862989SVitaly Wool z3fold_page_unlock(zhdr); 16851f862989SVitaly Wool } 16861f862989SVitaly Wool 16871f862989SVitaly Wool static const struct address_space_operations z3fold_aops = { 16881f862989SVitaly Wool .isolate_page = z3fold_page_isolate, 16891f862989SVitaly Wool .migratepage = z3fold_page_migrate, 16901f862989SVitaly Wool .putback_page = z3fold_page_putback, 16911f862989SVitaly Wool }; 16921f862989SVitaly Wool 16939a001fc1SVitaly Wool /***************** 16949a001fc1SVitaly Wool * zpool 16959a001fc1SVitaly Wool ****************/ 16969a001fc1SVitaly Wool 16979a001fc1SVitaly Wool static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 16989a001fc1SVitaly Wool { 16999a001fc1SVitaly Wool if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 17009a001fc1SVitaly Wool return pool->zpool_ops->evict(pool->zpool, handle); 17019a001fc1SVitaly Wool else 17029a001fc1SVitaly Wool return -ENOENT; 17039a001fc1SVitaly Wool } 17049a001fc1SVitaly Wool 17059a001fc1SVitaly Wool static const struct z3fold_ops z3fold_zpool_ops = { 17069a001fc1SVitaly Wool .evict = z3fold_zpool_evict 17079a001fc1SVitaly Wool }; 17089a001fc1SVitaly Wool 17099a001fc1SVitaly Wool static void *z3fold_zpool_create(const char *name, gfp_t gfp, 17109a001fc1SVitaly Wool const struct zpool_ops *zpool_ops, 17119a001fc1SVitaly Wool struct zpool *zpool) 17129a001fc1SVitaly Wool { 17139a001fc1SVitaly Wool struct z3fold_pool *pool; 17149a001fc1SVitaly Wool 1715d30561c5SVitaly Wool pool = z3fold_create_pool(name, gfp, 1716d30561c5SVitaly Wool zpool_ops ? &z3fold_zpool_ops : NULL); 17179a001fc1SVitaly Wool if (pool) { 17189a001fc1SVitaly Wool pool->zpool = zpool; 17199a001fc1SVitaly Wool pool->zpool_ops = zpool_ops; 17209a001fc1SVitaly Wool } 17219a001fc1SVitaly Wool return pool; 17229a001fc1SVitaly Wool } 17239a001fc1SVitaly Wool 17249a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool) 17259a001fc1SVitaly Wool { 17269a001fc1SVitaly Wool z3fold_destroy_pool(pool); 17279a001fc1SVitaly Wool } 17289a001fc1SVitaly Wool 17299a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 17309a001fc1SVitaly Wool unsigned long *handle) 17319a001fc1SVitaly Wool { 17329a001fc1SVitaly Wool return z3fold_alloc(pool, size, gfp, handle); 17339a001fc1SVitaly Wool } 17349a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle) 17359a001fc1SVitaly Wool { 17369a001fc1SVitaly Wool z3fold_free(pool, handle); 17379a001fc1SVitaly Wool } 17389a001fc1SVitaly Wool 17399a001fc1SVitaly Wool static int z3fold_zpool_shrink(void *pool, unsigned int pages, 17409a001fc1SVitaly Wool unsigned int *reclaimed) 17419a001fc1SVitaly Wool { 17429a001fc1SVitaly Wool unsigned int total = 0; 17439a001fc1SVitaly Wool int ret = -EINVAL; 17449a001fc1SVitaly Wool 17459a001fc1SVitaly Wool while (total < pages) { 17469a001fc1SVitaly Wool ret = z3fold_reclaim_page(pool, 8); 17479a001fc1SVitaly Wool if (ret < 0) 17489a001fc1SVitaly Wool break; 17499a001fc1SVitaly Wool total++; 17509a001fc1SVitaly Wool } 17519a001fc1SVitaly Wool 17529a001fc1SVitaly Wool if (reclaimed) 17539a001fc1SVitaly Wool *reclaimed = total; 17549a001fc1SVitaly Wool 17559a001fc1SVitaly Wool return ret; 17569a001fc1SVitaly Wool } 17579a001fc1SVitaly Wool 17589a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle, 17599a001fc1SVitaly Wool enum zpool_mapmode mm) 17609a001fc1SVitaly Wool { 17619a001fc1SVitaly Wool return z3fold_map(pool, handle); 17629a001fc1SVitaly Wool } 17639a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle) 17649a001fc1SVitaly Wool { 17659a001fc1SVitaly Wool z3fold_unmap(pool, handle); 17669a001fc1SVitaly Wool } 17679a001fc1SVitaly Wool 17689a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool) 17699a001fc1SVitaly Wool { 17709a001fc1SVitaly Wool return z3fold_get_pool_size(pool) * PAGE_SIZE; 17719a001fc1SVitaly Wool } 17729a001fc1SVitaly Wool 17739a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = { 17749a001fc1SVitaly Wool .type = "z3fold", 1775e818e820STian Tao .sleep_mapped = true, 17769a001fc1SVitaly Wool .owner = THIS_MODULE, 17779a001fc1SVitaly Wool .create = z3fold_zpool_create, 17789a001fc1SVitaly Wool .destroy = z3fold_zpool_destroy, 17799a001fc1SVitaly Wool .malloc = z3fold_zpool_malloc, 17809a001fc1SVitaly Wool .free = z3fold_zpool_free, 17819a001fc1SVitaly Wool .shrink = z3fold_zpool_shrink, 17829a001fc1SVitaly Wool .map = z3fold_zpool_map, 17839a001fc1SVitaly Wool .unmap = z3fold_zpool_unmap, 17849a001fc1SVitaly Wool .total_size = z3fold_zpool_total_size, 17859a001fc1SVitaly Wool }; 17869a001fc1SVitaly Wool 17879a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold"); 17889a001fc1SVitaly Wool 17899a001fc1SVitaly Wool static int __init init_z3fold(void) 17909a001fc1SVitaly Wool { 17911f862989SVitaly Wool int ret; 17921f862989SVitaly Wool 1793014284a0SMiaohe Lin /* 1794014284a0SMiaohe Lin * Make sure the z3fold header is not larger than the page size and 1795014284a0SMiaohe Lin * there has remaining spaces for its buddy. 1796014284a0SMiaohe Lin */ 1797014284a0SMiaohe Lin BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); 17981f862989SVitaly Wool ret = z3fold_mount(); 17991f862989SVitaly Wool if (ret) 18001f862989SVitaly Wool return ret; 18011f862989SVitaly Wool 18029a001fc1SVitaly Wool zpool_register_driver(&z3fold_zpool_driver); 18039a001fc1SVitaly Wool 18049a001fc1SVitaly Wool return 0; 18059a001fc1SVitaly Wool } 18069a001fc1SVitaly Wool 18079a001fc1SVitaly Wool static void __exit exit_z3fold(void) 18089a001fc1SVitaly Wool { 18091f862989SVitaly Wool z3fold_unmount(); 18109a001fc1SVitaly Wool zpool_unregister_driver(&z3fold_zpool_driver); 18119a001fc1SVitaly Wool } 18129a001fc1SVitaly Wool 18139a001fc1SVitaly Wool module_init(init_z3fold); 18149a001fc1SVitaly Wool module_exit(exit_z3fold); 18159a001fc1SVitaly Wool 18169a001fc1SVitaly Wool MODULE_LICENSE("GPL"); 18179a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 18189a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1819