xref: /linux/mm/z3fold.c (revision 767cc6c5568afa50ef6abbd4efb61beee56f9cc8)
109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
29a001fc1SVitaly Wool /*
39a001fc1SVitaly Wool  * z3fold.c
49a001fc1SVitaly Wool  *
59a001fc1SVitaly Wool  * Author: Vitaly Wool <vitaly.wool@konsulko.com>
69a001fc1SVitaly Wool  * Copyright (C) 2016, Sony Mobile Communications Inc.
79a001fc1SVitaly Wool  *
89a001fc1SVitaly Wool  * This implementation is based on zbud written by Seth Jennings.
99a001fc1SVitaly Wool  *
109a001fc1SVitaly Wool  * z3fold is an special purpose allocator for storing compressed pages. It
119a001fc1SVitaly Wool  * can store up to three compressed pages per page which improves the
129a001fc1SVitaly Wool  * compression ratio of zbud while retaining its main concepts (e. g. always
139a001fc1SVitaly Wool  * storing an integral number of objects per page) and simplicity.
149a001fc1SVitaly Wool  * It still has simple and deterministic reclaim properties that make it
159a001fc1SVitaly Wool  * preferable to a higher density approach (with no requirement on integral
169a001fc1SVitaly Wool  * number of object per page) when reclaim is used.
179a001fc1SVitaly Wool  *
189a001fc1SVitaly Wool  * As in zbud, pages are divided into "chunks".  The size of the chunks is
199a001fc1SVitaly Wool  * fixed at compile time and is determined by NCHUNKS_ORDER below.
209a001fc1SVitaly Wool  *
219a001fc1SVitaly Wool  * z3fold doesn't export any API and is meant to be used via zpool API.
229a001fc1SVitaly Wool  */
239a001fc1SVitaly Wool 
249a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
259a001fc1SVitaly Wool 
269a001fc1SVitaly Wool #include <linux/atomic.h>
27d30561c5SVitaly Wool #include <linux/sched.h>
281f862989SVitaly Wool #include <linux/cpumask.h>
299a001fc1SVitaly Wool #include <linux/list.h>
309a001fc1SVitaly Wool #include <linux/mm.h>
319a001fc1SVitaly Wool #include <linux/module.h>
321f862989SVitaly Wool #include <linux/page-flags.h>
331f862989SVitaly Wool #include <linux/migrate.h>
341f862989SVitaly Wool #include <linux/node.h>
351f862989SVitaly Wool #include <linux/compaction.h>
36d30561c5SVitaly Wool #include <linux/percpu.h>
371f862989SVitaly Wool #include <linux/mount.h>
38ea8157abSDavid Howells #include <linux/pseudo_fs.h>
391f862989SVitaly Wool #include <linux/fs.h>
409a001fc1SVitaly Wool #include <linux/preempt.h>
41d30561c5SVitaly Wool #include <linux/workqueue.h>
429a001fc1SVitaly Wool #include <linux/slab.h>
439a001fc1SVitaly Wool #include <linux/spinlock.h>
449a001fc1SVitaly Wool #include <linux/zpool.h>
45ea8157abSDavid Howells #include <linux/magic.h>
46af4798a5SQian Cai #include <linux/kmemleak.h>
479a001fc1SVitaly Wool 
489a001fc1SVitaly Wool /*
499a001fc1SVitaly Wool  * NCHUNKS_ORDER determines the internal allocation granularity, effectively
509a001fc1SVitaly Wool  * adjusting internal fragmentation.  It also determines the number of
519a001fc1SVitaly Wool  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
52ede93213SVitaly Wool  * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
53ede93213SVitaly Wool  * in the beginning of an allocated page are occupied by z3fold header, so
54ede93213SVitaly Wool  * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
55ede93213SVitaly Wool  * which shows the max number of free chunks in z3fold page, also there will
56ede93213SVitaly Wool  * be 63, or 62, respectively, freelists per pool.
579a001fc1SVitaly Wool  */
589a001fc1SVitaly Wool #define NCHUNKS_ORDER	6
599a001fc1SVitaly Wool 
609a001fc1SVitaly Wool #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
619a001fc1SVitaly Wool #define CHUNK_SIZE	(1 << CHUNK_SHIFT)
62ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
63ede93213SVitaly Wool #define ZHDR_CHUNKS	(ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
64ede93213SVitaly Wool #define TOTAL_CHUNKS	(PAGE_SIZE >> CHUNK_SHIFT)
65e3c0db4fSMiaohe Lin #define NCHUNKS		(TOTAL_CHUNKS - ZHDR_CHUNKS)
669a001fc1SVitaly Wool 
67f201ebd8Szhong jiang #define BUDDY_MASK	(0x3)
68ca0246bbSVitaly Wool #define BUDDY_SHIFT	2
697c2b8baaSVitaly Wool #define SLOTS_ALIGN	(0x40)
707c2b8baaSVitaly Wool 
717c2b8baaSVitaly Wool /*****************
727c2b8baaSVitaly Wool  * Structures
737c2b8baaSVitaly Wool *****************/
747c2b8baaSVitaly Wool struct z3fold_pool;
757c2b8baaSVitaly Wool struct z3fold_ops {
767c2b8baaSVitaly Wool 	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
777c2b8baaSVitaly Wool };
787c2b8baaSVitaly Wool 
797c2b8baaSVitaly Wool enum buddy {
807c2b8baaSVitaly Wool 	HEADLESS = 0,
817c2b8baaSVitaly Wool 	FIRST,
827c2b8baaSVitaly Wool 	MIDDLE,
837c2b8baaSVitaly Wool 	LAST,
847c2b8baaSVitaly Wool 	BUDDIES_MAX = LAST
857c2b8baaSVitaly Wool };
867c2b8baaSVitaly Wool 
877c2b8baaSVitaly Wool struct z3fold_buddy_slots {
887c2b8baaSVitaly Wool 	/*
897c2b8baaSVitaly Wool 	 * we are using BUDDY_MASK in handle_to_buddy etc. so there should
907c2b8baaSVitaly Wool 	 * be enough slots to hold all possible variants
917c2b8baaSVitaly Wool 	 */
927c2b8baaSVitaly Wool 	unsigned long slot[BUDDY_MASK + 1];
93fc548865SVitaly Wool 	unsigned long pool; /* back link */
944a3ac931SVitaly Wool 	rwlock_t lock;
957c2b8baaSVitaly Wool };
967c2b8baaSVitaly Wool #define HANDLE_FLAG_MASK	(0x03)
977c2b8baaSVitaly Wool 
987c2b8baaSVitaly Wool /*
997c2b8baaSVitaly Wool  * struct z3fold_header - z3fold page metadata occupying first chunks of each
1007c2b8baaSVitaly Wool  *			z3fold page, except for HEADLESS pages
1017c2b8baaSVitaly Wool  * @buddy:		links the z3fold page into the relevant list in the
1027c2b8baaSVitaly Wool  *			pool
1037c2b8baaSVitaly Wool  * @page_lock:		per-page lock
1047c2b8baaSVitaly Wool  * @refcount:		reference count for the z3fold page
1057c2b8baaSVitaly Wool  * @work:		work_struct for page layout optimization
1067c2b8baaSVitaly Wool  * @slots:		pointer to the structure holding buddy slots
107bb9a374dSVitaly Wool  * @pool:		pointer to the containing pool
1087c2b8baaSVitaly Wool  * @cpu:		CPU which this page "belongs" to
1097c2b8baaSVitaly Wool  * @first_chunks:	the size of the first buddy in chunks, 0 if free
1107c2b8baaSVitaly Wool  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
1117c2b8baaSVitaly Wool  * @last_chunks:	the size of the last buddy in chunks, 0 if free
1127c2b8baaSVitaly Wool  * @first_num:		the starting number (for the first handle)
1131f862989SVitaly Wool  * @mapped_count:	the number of objects currently mapped
1147c2b8baaSVitaly Wool  */
1157c2b8baaSVitaly Wool struct z3fold_header {
1167c2b8baaSVitaly Wool 	struct list_head buddy;
1177c2b8baaSVitaly Wool 	spinlock_t page_lock;
1187c2b8baaSVitaly Wool 	struct kref refcount;
1197c2b8baaSVitaly Wool 	struct work_struct work;
1207c2b8baaSVitaly Wool 	struct z3fold_buddy_slots *slots;
121bb9a374dSVitaly Wool 	struct z3fold_pool *pool;
1227c2b8baaSVitaly Wool 	short cpu;
1237c2b8baaSVitaly Wool 	unsigned short first_chunks;
1247c2b8baaSVitaly Wool 	unsigned short middle_chunks;
1257c2b8baaSVitaly Wool 	unsigned short last_chunks;
1267c2b8baaSVitaly Wool 	unsigned short start_middle;
1277c2b8baaSVitaly Wool 	unsigned short first_num:2;
1281f862989SVitaly Wool 	unsigned short mapped_count:2;
1294a3ac931SVitaly Wool 	unsigned short foreign_handles:2;
1307c2b8baaSVitaly Wool };
1319a001fc1SVitaly Wool 
1329a001fc1SVitaly Wool /**
1339a001fc1SVitaly Wool  * struct z3fold_pool - stores metadata for each z3fold pool
134d30561c5SVitaly Wool  * @name:	pool name
135d30561c5SVitaly Wool  * @lock:	protects pool unbuddied/lru lists
136d30561c5SVitaly Wool  * @stale_lock:	protects pool stale page list
137d30561c5SVitaly Wool  * @unbuddied:	per-cpu array of lists tracking z3fold pages that contain 2-
138d30561c5SVitaly Wool  *		buddies; the list each z3fold page is added to depends on
139d30561c5SVitaly Wool  *		the size of its free region.
1409a001fc1SVitaly Wool  * @lru:	list tracking the z3fold pages in LRU order by most recently
1419a001fc1SVitaly Wool  *		added buddy.
142d30561c5SVitaly Wool  * @stale:	list of pages marked for freeing
1439a001fc1SVitaly Wool  * @pages_nr:	number of z3fold pages in the pool.
1447c2b8baaSVitaly Wool  * @c_handle:	cache for z3fold_buddy_slots allocation
1459a001fc1SVitaly Wool  * @ops:	pointer to a structure of user defined operations specified at
1469a001fc1SVitaly Wool  *		pool creation time.
147d30561c5SVitaly Wool  * @compact_wq:	workqueue for page layout background optimization
148d30561c5SVitaly Wool  * @release_wq:	workqueue for safe page release
149d30561c5SVitaly Wool  * @work:	work_struct for safe page release
1501f862989SVitaly Wool  * @inode:	inode for z3fold pseudo filesystem
1519a001fc1SVitaly Wool  *
1529a001fc1SVitaly Wool  * This structure is allocated at pool creation time and maintains metadata
1539a001fc1SVitaly Wool  * pertaining to a particular z3fold pool.
1549a001fc1SVitaly Wool  */
1559a001fc1SVitaly Wool struct z3fold_pool {
156d30561c5SVitaly Wool 	const char *name;
1579a001fc1SVitaly Wool 	spinlock_t lock;
158d30561c5SVitaly Wool 	spinlock_t stale_lock;
159d30561c5SVitaly Wool 	struct list_head *unbuddied;
1609a001fc1SVitaly Wool 	struct list_head lru;
161d30561c5SVitaly Wool 	struct list_head stale;
16212d59ae6SVitaly Wool 	atomic64_t pages_nr;
1637c2b8baaSVitaly Wool 	struct kmem_cache *c_handle;
1649a001fc1SVitaly Wool 	const struct z3fold_ops *ops;
1659a001fc1SVitaly Wool 	struct zpool *zpool;
1669a001fc1SVitaly Wool 	const struct zpool_ops *zpool_ops;
167d30561c5SVitaly Wool 	struct workqueue_struct *compact_wq;
168d30561c5SVitaly Wool 	struct workqueue_struct *release_wq;
169d30561c5SVitaly Wool 	struct work_struct work;
1701f862989SVitaly Wool 	struct inode *inode;
1719a001fc1SVitaly Wool };
1729a001fc1SVitaly Wool 
1739a001fc1SVitaly Wool /*
1749a001fc1SVitaly Wool  * Internal z3fold page flags
1759a001fc1SVitaly Wool  */
1769a001fc1SVitaly Wool enum z3fold_page_flags {
1775a27aa82SVitaly Wool 	PAGE_HEADLESS = 0,
1789a001fc1SVitaly Wool 	MIDDLE_CHUNK_MAPPED,
179d30561c5SVitaly Wool 	NEEDS_COMPACTING,
1806098d7e1SVitaly Wool 	PAGE_STALE,
181ca0246bbSVitaly Wool 	PAGE_CLAIMED, /* by either reclaim or free */
1829a001fc1SVitaly Wool };
1839a001fc1SVitaly Wool 
1844a3ac931SVitaly Wool /*
185dcf5aedbSVitaly Wool  * handle flags, go under HANDLE_FLAG_MASK
186dcf5aedbSVitaly Wool  */
187dcf5aedbSVitaly Wool enum z3fold_handle_flags {
188dcf5aedbSVitaly Wool 	HANDLES_NOFREE = 0,
189dcf5aedbSVitaly Wool };
190dcf5aedbSVitaly Wool 
191dcf5aedbSVitaly Wool /*
1924a3ac931SVitaly Wool  * Forward declarations
1934a3ac931SVitaly Wool  */
1944a3ac931SVitaly Wool static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool);
1954a3ac931SVitaly Wool static void compact_page_work(struct work_struct *w);
1964a3ac931SVitaly Wool 
1979a001fc1SVitaly Wool /*****************
1989a001fc1SVitaly Wool  * Helpers
1999a001fc1SVitaly Wool *****************/
2009a001fc1SVitaly Wool 
2019a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */
2029a001fc1SVitaly Wool static int size_to_chunks(size_t size)
2039a001fc1SVitaly Wool {
2049a001fc1SVitaly Wool 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
2059a001fc1SVitaly Wool }
2069a001fc1SVitaly Wool 
2079a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \
2089a001fc1SVitaly Wool 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
2099a001fc1SVitaly Wool 
210bb9f6f63SVitaly Wool static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
211bb9f6f63SVitaly Wool 							gfp_t gfp)
2127c2b8baaSVitaly Wool {
213f1549cb5SHenry Burns 	struct z3fold_buddy_slots *slots;
214f1549cb5SHenry Burns 
215f94afee9SHui Su 	slots = kmem_cache_zalloc(pool->c_handle,
216f1549cb5SHenry Burns 				 (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE)));
2177c2b8baaSVitaly Wool 
2187c2b8baaSVitaly Wool 	if (slots) {
219af4798a5SQian Cai 		/* It will be freed separately in free_handle(). */
220af4798a5SQian Cai 		kmemleak_not_leak(slots);
2217c2b8baaSVitaly Wool 		slots->pool = (unsigned long)pool;
2224a3ac931SVitaly Wool 		rwlock_init(&slots->lock);
2237c2b8baaSVitaly Wool 	}
2247c2b8baaSVitaly Wool 
2257c2b8baaSVitaly Wool 	return slots;
2267c2b8baaSVitaly Wool }
2277c2b8baaSVitaly Wool 
2287c2b8baaSVitaly Wool static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s)
2297c2b8baaSVitaly Wool {
2307c2b8baaSVitaly Wool 	return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK);
2317c2b8baaSVitaly Wool }
2327c2b8baaSVitaly Wool 
2337c2b8baaSVitaly Wool static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle)
2347c2b8baaSVitaly Wool {
2357c2b8baaSVitaly Wool 	return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1));
2367c2b8baaSVitaly Wool }
2377c2b8baaSVitaly Wool 
2384a3ac931SVitaly Wool /* Lock a z3fold page */
2394a3ac931SVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr)
2404a3ac931SVitaly Wool {
2414a3ac931SVitaly Wool 	spin_lock(&zhdr->page_lock);
2424a3ac931SVitaly Wool }
2434a3ac931SVitaly Wool 
2444a3ac931SVitaly Wool /* Try to lock a z3fold page */
2454a3ac931SVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
2464a3ac931SVitaly Wool {
2474a3ac931SVitaly Wool 	return spin_trylock(&zhdr->page_lock);
2484a3ac931SVitaly Wool }
2494a3ac931SVitaly Wool 
2504a3ac931SVitaly Wool /* Unlock a z3fold page */
2514a3ac931SVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
2524a3ac931SVitaly Wool {
2534a3ac931SVitaly Wool 	spin_unlock(&zhdr->page_lock);
2544a3ac931SVitaly Wool }
2554a3ac931SVitaly Wool 
256*767cc6c5SMiaohe Lin /* return locked z3fold page if it's not headless */
257*767cc6c5SMiaohe Lin static inline struct z3fold_header *get_z3fold_header(unsigned long handle)
2584a3ac931SVitaly Wool {
2594a3ac931SVitaly Wool 	struct z3fold_buddy_slots *slots;
2604a3ac931SVitaly Wool 	struct z3fold_header *zhdr;
2614a3ac931SVitaly Wool 	int locked = 0;
2624a3ac931SVitaly Wool 
2634a3ac931SVitaly Wool 	if (!(handle & (1 << PAGE_HEADLESS))) {
2644a3ac931SVitaly Wool 		slots = handle_to_slots(handle);
2654a3ac931SVitaly Wool 		do {
2664a3ac931SVitaly Wool 			unsigned long addr;
2674a3ac931SVitaly Wool 
2684a3ac931SVitaly Wool 			read_lock(&slots->lock);
2694a3ac931SVitaly Wool 			addr = *(unsigned long *)handle;
2704a3ac931SVitaly Wool 			zhdr = (struct z3fold_header *)(addr & PAGE_MASK);
2714a3ac931SVitaly Wool 			locked = z3fold_page_trylock(zhdr);
2724a3ac931SVitaly Wool 			read_unlock(&slots->lock);
2734a3ac931SVitaly Wool 			if (locked)
2744a3ac931SVitaly Wool 				break;
2754a3ac931SVitaly Wool 			cpu_relax();
276*767cc6c5SMiaohe Lin 		} while (true);
2774a3ac931SVitaly Wool 	} else {
2784a3ac931SVitaly Wool 		zhdr = (struct z3fold_header *)(handle & PAGE_MASK);
2794a3ac931SVitaly Wool 	}
2804a3ac931SVitaly Wool 
2814a3ac931SVitaly Wool 	return zhdr;
2824a3ac931SVitaly Wool }
2834a3ac931SVitaly Wool 
2844a3ac931SVitaly Wool static inline void put_z3fold_header(struct z3fold_header *zhdr)
2854a3ac931SVitaly Wool {
2864a3ac931SVitaly Wool 	struct page *page = virt_to_page(zhdr);
2874a3ac931SVitaly Wool 
2884a3ac931SVitaly Wool 	if (!test_bit(PAGE_HEADLESS, &page->private))
2894a3ac931SVitaly Wool 		z3fold_page_unlock(zhdr);
2904a3ac931SVitaly Wool }
2914a3ac931SVitaly Wool 
292fc548865SVitaly Wool static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr)
2937c2b8baaSVitaly Wool {
2947c2b8baaSVitaly Wool 	struct z3fold_buddy_slots *slots;
2957c2b8baaSVitaly Wool 	int i;
2967c2b8baaSVitaly Wool 	bool is_free;
2977c2b8baaSVitaly Wool 
2987c2b8baaSVitaly Wool 	if (handle & (1 << PAGE_HEADLESS))
2997c2b8baaSVitaly Wool 		return;
3007c2b8baaSVitaly Wool 
3014a3ac931SVitaly Wool 	if (WARN_ON(*(unsigned long *)handle == 0))
3024a3ac931SVitaly Wool 		return;
3034a3ac931SVitaly Wool 
3047c2b8baaSVitaly Wool 	slots = handle_to_slots(handle);
3054a3ac931SVitaly Wool 	write_lock(&slots->lock);
3064a3ac931SVitaly Wool 	*(unsigned long *)handle = 0;
307dcf5aedbSVitaly Wool 
308dcf5aedbSVitaly Wool 	if (test_bit(HANDLES_NOFREE, &slots->pool)) {
309dcf5aedbSVitaly Wool 		write_unlock(&slots->lock);
310dcf5aedbSVitaly Wool 		return; /* simple case, nothing else to do */
311dcf5aedbSVitaly Wool 	}
312dcf5aedbSVitaly Wool 
313fc548865SVitaly Wool 	if (zhdr->slots != slots)
3144a3ac931SVitaly Wool 		zhdr->foreign_handles--;
315fc548865SVitaly Wool 
3167c2b8baaSVitaly Wool 	is_free = true;
3177c2b8baaSVitaly Wool 	for (i = 0; i <= BUDDY_MASK; i++) {
3187c2b8baaSVitaly Wool 		if (slots->slot[i]) {
3197c2b8baaSVitaly Wool 			is_free = false;
3207c2b8baaSVitaly Wool 			break;
3217c2b8baaSVitaly Wool 		}
3227c2b8baaSVitaly Wool 	}
323d8f117abSUladzislau Rezki 	write_unlock(&slots->lock);
3247c2b8baaSVitaly Wool 
3257c2b8baaSVitaly Wool 	if (is_free) {
3267c2b8baaSVitaly Wool 		struct z3fold_pool *pool = slots_to_pool(slots);
3277c2b8baaSVitaly Wool 
328fc548865SVitaly Wool 		if (zhdr->slots == slots)
329fc548865SVitaly Wool 			zhdr->slots = NULL;
3307c2b8baaSVitaly Wool 		kmem_cache_free(pool->c_handle, slots);
3317c2b8baaSVitaly Wool 	}
3327c2b8baaSVitaly Wool }
3337c2b8baaSVitaly Wool 
334ea8157abSDavid Howells static int z3fold_init_fs_context(struct fs_context *fc)
3351f862989SVitaly Wool {
336ea8157abSDavid Howells 	return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
3371f862989SVitaly Wool }
3381f862989SVitaly Wool 
3391f862989SVitaly Wool static struct file_system_type z3fold_fs = {
3401f862989SVitaly Wool 	.name		= "z3fold",
341ea8157abSDavid Howells 	.init_fs_context = z3fold_init_fs_context,
3421f862989SVitaly Wool 	.kill_sb	= kill_anon_super,
3431f862989SVitaly Wool };
3441f862989SVitaly Wool 
3451f862989SVitaly Wool static struct vfsmount *z3fold_mnt;
3461f862989SVitaly Wool static int z3fold_mount(void)
3471f862989SVitaly Wool {
3481f862989SVitaly Wool 	int ret = 0;
3491f862989SVitaly Wool 
3501f862989SVitaly Wool 	z3fold_mnt = kern_mount(&z3fold_fs);
3511f862989SVitaly Wool 	if (IS_ERR(z3fold_mnt))
3521f862989SVitaly Wool 		ret = PTR_ERR(z3fold_mnt);
3531f862989SVitaly Wool 
3541f862989SVitaly Wool 	return ret;
3551f862989SVitaly Wool }
3561f862989SVitaly Wool 
3571f862989SVitaly Wool static void z3fold_unmount(void)
3581f862989SVitaly Wool {
3591f862989SVitaly Wool 	kern_unmount(z3fold_mnt);
3601f862989SVitaly Wool }
3611f862989SVitaly Wool 
3621f862989SVitaly Wool static const struct address_space_operations z3fold_aops;
3631f862989SVitaly Wool static int z3fold_register_migration(struct z3fold_pool *pool)
3641f862989SVitaly Wool {
3651f862989SVitaly Wool 	pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
3661f862989SVitaly Wool 	if (IS_ERR(pool->inode)) {
3671f862989SVitaly Wool 		pool->inode = NULL;
3681f862989SVitaly Wool 		return 1;
3691f862989SVitaly Wool 	}
3701f862989SVitaly Wool 
3711f862989SVitaly Wool 	pool->inode->i_mapping->private_data = pool;
3721f862989SVitaly Wool 	pool->inode->i_mapping->a_ops = &z3fold_aops;
3731f862989SVitaly Wool 	return 0;
3741f862989SVitaly Wool }
3751f862989SVitaly Wool 
3761f862989SVitaly Wool static void z3fold_unregister_migration(struct z3fold_pool *pool)
3771f862989SVitaly Wool {
3781f862989SVitaly Wool 	if (pool->inode)
3791f862989SVitaly Wool 		iput(pool->inode);
3801f862989SVitaly Wool }
3811f862989SVitaly Wool 
3829a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */
38363398413SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
384bb9f6f63SVitaly Wool 					struct z3fold_pool *pool, gfp_t gfp)
3859a001fc1SVitaly Wool {
3869a001fc1SVitaly Wool 	struct z3fold_header *zhdr = page_address(page);
38763398413SVitaly Wool 	struct z3fold_buddy_slots *slots;
3889a001fc1SVitaly Wool 
3899a001fc1SVitaly Wool 	INIT_LIST_HEAD(&page->lru);
3909a001fc1SVitaly Wool 	clear_bit(PAGE_HEADLESS, &page->private);
3919a001fc1SVitaly Wool 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
392d30561c5SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
393d30561c5SVitaly Wool 	clear_bit(PAGE_STALE, &page->private);
394ca0246bbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
39563398413SVitaly Wool 	if (headless)
39663398413SVitaly Wool 		return zhdr;
39763398413SVitaly Wool 
39863398413SVitaly Wool 	slots = alloc_slots(pool, gfp);
39963398413SVitaly Wool 	if (!slots)
40063398413SVitaly Wool 		return NULL;
4019a001fc1SVitaly Wool 
402c457cd96SMiaohe Lin 	memset(zhdr, 0, sizeof(*zhdr));
4032f1e5e4dSVitaly Wool 	spin_lock_init(&zhdr->page_lock);
4045a27aa82SVitaly Wool 	kref_init(&zhdr->refcount);
405d30561c5SVitaly Wool 	zhdr->cpu = -1;
4067c2b8baaSVitaly Wool 	zhdr->slots = slots;
407bb9a374dSVitaly Wool 	zhdr->pool = pool;
4089a001fc1SVitaly Wool 	INIT_LIST_HEAD(&zhdr->buddy);
409d30561c5SVitaly Wool 	INIT_WORK(&zhdr->work, compact_page_work);
4109a001fc1SVitaly Wool 	return zhdr;
4119a001fc1SVitaly Wool }
4129a001fc1SVitaly Wool 
4139a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */
4141f862989SVitaly Wool static void free_z3fold_page(struct page *page, bool headless)
4159a001fc1SVitaly Wool {
4161f862989SVitaly Wool 	if (!headless) {
4171f862989SVitaly Wool 		lock_page(page);
4181f862989SVitaly Wool 		__ClearPageMovable(page);
4191f862989SVitaly Wool 		unlock_page(page);
4201f862989SVitaly Wool 	}
4211f862989SVitaly Wool 	ClearPagePrivate(page);
4225a27aa82SVitaly Wool 	__free_page(page);
4235a27aa82SVitaly Wool }
4245a27aa82SVitaly Wool 
4257c2b8baaSVitaly Wool /* Helper function to build the index */
4267c2b8baaSVitaly Wool static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
4277c2b8baaSVitaly Wool {
4287c2b8baaSVitaly Wool 	return (bud + zhdr->first_num) & BUDDY_MASK;
4297c2b8baaSVitaly Wool }
4307c2b8baaSVitaly Wool 
4319a001fc1SVitaly Wool /*
4329a001fc1SVitaly Wool  * Encodes the handle of a particular buddy within a z3fold page
4339a001fc1SVitaly Wool  * Pool lock should be held as this function accesses first_num
4349a001fc1SVitaly Wool  */
4353f9d2b57SVitaly Wool static unsigned long __encode_handle(struct z3fold_header *zhdr,
4363f9d2b57SVitaly Wool 				struct z3fold_buddy_slots *slots,
4373f9d2b57SVitaly Wool 				enum buddy bud)
4389a001fc1SVitaly Wool {
4397c2b8baaSVitaly Wool 	unsigned long h = (unsigned long)zhdr;
4407c2b8baaSVitaly Wool 	int idx = 0;
4419a001fc1SVitaly Wool 
4427c2b8baaSVitaly Wool 	/*
4437c2b8baaSVitaly Wool 	 * For a headless page, its handle is its pointer with the extra
4447c2b8baaSVitaly Wool 	 * PAGE_HEADLESS bit set
4457c2b8baaSVitaly Wool 	 */
4467c2b8baaSVitaly Wool 	if (bud == HEADLESS)
4477c2b8baaSVitaly Wool 		return h | (1 << PAGE_HEADLESS);
4487c2b8baaSVitaly Wool 
4497c2b8baaSVitaly Wool 	/* otherwise, return pointer to encoded handle */
4507c2b8baaSVitaly Wool 	idx = __idx(zhdr, bud);
4517c2b8baaSVitaly Wool 	h += idx;
452ca0246bbSVitaly Wool 	if (bud == LAST)
4537c2b8baaSVitaly Wool 		h |= (zhdr->last_chunks << BUDDY_SHIFT);
4547c2b8baaSVitaly Wool 
4554a3ac931SVitaly Wool 	write_lock(&slots->lock);
4567c2b8baaSVitaly Wool 	slots->slot[idx] = h;
4574a3ac931SVitaly Wool 	write_unlock(&slots->lock);
4587c2b8baaSVitaly Wool 	return (unsigned long)&slots->slot[idx];
4599a001fc1SVitaly Wool }
4609a001fc1SVitaly Wool 
4613f9d2b57SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
4623f9d2b57SVitaly Wool {
4633f9d2b57SVitaly Wool 	return __encode_handle(zhdr, zhdr->slots, bud);
4643f9d2b57SVitaly Wool }
4653f9d2b57SVitaly Wool 
466ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */
467ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle)
468ca0246bbSVitaly Wool {
4694a3ac931SVitaly Wool 	struct z3fold_buddy_slots *slots = handle_to_slots(handle);
4704a3ac931SVitaly Wool 	unsigned long addr;
4717c2b8baaSVitaly Wool 
4724a3ac931SVitaly Wool 	read_lock(&slots->lock);
4734a3ac931SVitaly Wool 	addr = *(unsigned long *)handle;
4744a3ac931SVitaly Wool 	read_unlock(&slots->lock);
4757c2b8baaSVitaly Wool 	return (addr & ~PAGE_MASK) >> BUDDY_SHIFT;
476ca0246bbSVitaly Wool }
477ca0246bbSVitaly Wool 
478f201ebd8Szhong jiang /*
479f201ebd8Szhong jiang  * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
480f201ebd8Szhong jiang  *  but that doesn't matter. because the masking will result in the
481f201ebd8Szhong jiang  *  correct buddy number.
482f201ebd8Szhong jiang  */
4839a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle)
4849a001fc1SVitaly Wool {
4857c2b8baaSVitaly Wool 	struct z3fold_header *zhdr;
4864a3ac931SVitaly Wool 	struct z3fold_buddy_slots *slots = handle_to_slots(handle);
4877c2b8baaSVitaly Wool 	unsigned long addr;
4887c2b8baaSVitaly Wool 
4894a3ac931SVitaly Wool 	read_lock(&slots->lock);
4907c2b8baaSVitaly Wool 	WARN_ON(handle & (1 << PAGE_HEADLESS));
4917c2b8baaSVitaly Wool 	addr = *(unsigned long *)handle;
4924a3ac931SVitaly Wool 	read_unlock(&slots->lock);
4937c2b8baaSVitaly Wool 	zhdr = (struct z3fold_header *)(addr & PAGE_MASK);
4947c2b8baaSVitaly Wool 	return (addr - zhdr->first_num) & BUDDY_MASK;
4959a001fc1SVitaly Wool }
4969a001fc1SVitaly Wool 
4979050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
4989050cce1SVitaly Wool {
499bb9a374dSVitaly Wool 	return zhdr->pool;
5009050cce1SVitaly Wool }
5019050cce1SVitaly Wool 
502d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
503d30561c5SVitaly Wool {
504d30561c5SVitaly Wool 	struct page *page = virt_to_page(zhdr);
5059050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
506d30561c5SVitaly Wool 
507d30561c5SVitaly Wool 	WARN_ON(!list_empty(&zhdr->buddy));
508d30561c5SVitaly Wool 	set_bit(PAGE_STALE, &page->private);
50935529357SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
510d30561c5SVitaly Wool 	spin_lock(&pool->lock);
511d30561c5SVitaly Wool 	if (!list_empty(&page->lru))
5121f862989SVitaly Wool 		list_del_init(&page->lru);
513d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
5144a3ac931SVitaly Wool 
515d30561c5SVitaly Wool 	if (locked)
516d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
5174a3ac931SVitaly Wool 
518d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
519d30561c5SVitaly Wool 	list_add(&zhdr->buddy, &pool->stale);
520d30561c5SVitaly Wool 	queue_work(pool->release_wq, &pool->work);
521d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
522d30561c5SVitaly Wool }
523d30561c5SVitaly Wool 
52470ad3196SMiaohe Lin static void release_z3fold_page(struct kref *ref)
525d30561c5SVitaly Wool {
526d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
527d30561c5SVitaly Wool 						refcount);
528d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, false);
529d30561c5SVitaly Wool }
530d30561c5SVitaly Wool 
531d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref)
532d30561c5SVitaly Wool {
533d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
534d30561c5SVitaly Wool 						refcount);
535d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
536d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
537d30561c5SVitaly Wool }
538d30561c5SVitaly Wool 
539d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref)
540d30561c5SVitaly Wool {
541d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
542d30561c5SVitaly Wool 					       refcount);
5439050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
5444a3ac931SVitaly Wool 
5459050cce1SVitaly Wool 	spin_lock(&pool->lock);
546d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
5479050cce1SVitaly Wool 	spin_unlock(&pool->lock);
548d30561c5SVitaly Wool 
549d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
550d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
551d30561c5SVitaly Wool }
552d30561c5SVitaly Wool 
553d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w)
554d30561c5SVitaly Wool {
555d30561c5SVitaly Wool 	struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work);
556d30561c5SVitaly Wool 
557d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
558d30561c5SVitaly Wool 	while (!list_empty(&pool->stale)) {
559d30561c5SVitaly Wool 		struct z3fold_header *zhdr = list_first_entry(&pool->stale,
560d30561c5SVitaly Wool 						struct z3fold_header, buddy);
561d30561c5SVitaly Wool 		struct page *page = virt_to_page(zhdr);
562d30561c5SVitaly Wool 
563d30561c5SVitaly Wool 		list_del(&zhdr->buddy);
564d30561c5SVitaly Wool 		if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
565d30561c5SVitaly Wool 			continue;
566d30561c5SVitaly Wool 		spin_unlock(&pool->stale_lock);
567d30561c5SVitaly Wool 		cancel_work_sync(&zhdr->work);
5681f862989SVitaly Wool 		free_z3fold_page(page, false);
569d30561c5SVitaly Wool 		cond_resched();
570d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
571d30561c5SVitaly Wool 	}
572d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
573d30561c5SVitaly Wool }
574d30561c5SVitaly Wool 
5759a001fc1SVitaly Wool /*
5769a001fc1SVitaly Wool  * Returns the number of free chunks in a z3fold page.
5779a001fc1SVitaly Wool  * NB: can't be used with HEADLESS pages.
5789a001fc1SVitaly Wool  */
5799a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr)
5809a001fc1SVitaly Wool {
5819a001fc1SVitaly Wool 	int nfree;
5829a001fc1SVitaly Wool 	/*
5839a001fc1SVitaly Wool 	 * If there is a middle object, pick up the bigger free space
5849a001fc1SVitaly Wool 	 * either before or after it. Otherwise just subtract the number
5859a001fc1SVitaly Wool 	 * of chunks occupied by the first and the last objects.
5869a001fc1SVitaly Wool 	 */
5879a001fc1SVitaly Wool 	if (zhdr->middle_chunks != 0) {
5889a001fc1SVitaly Wool 		int nfree_before = zhdr->first_chunks ?
589ede93213SVitaly Wool 			0 : zhdr->start_middle - ZHDR_CHUNKS;
5909a001fc1SVitaly Wool 		int nfree_after = zhdr->last_chunks ?
591ede93213SVitaly Wool 			0 : TOTAL_CHUNKS -
592ede93213SVitaly Wool 				(zhdr->start_middle + zhdr->middle_chunks);
5939a001fc1SVitaly Wool 		nfree = max(nfree_before, nfree_after);
5949a001fc1SVitaly Wool 	} else
5959a001fc1SVitaly Wool 		nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
5969a001fc1SVitaly Wool 	return nfree;
5979a001fc1SVitaly Wool }
5989a001fc1SVitaly Wool 
5999050cce1SVitaly Wool /* Add to the appropriate unbuddied list */
6009050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool,
6019050cce1SVitaly Wool 				struct z3fold_header *zhdr)
6029050cce1SVitaly Wool {
6039050cce1SVitaly Wool 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
6049050cce1SVitaly Wool 			zhdr->middle_chunks == 0) {
605135f97fdSVitaly Wool 		struct list_head *unbuddied;
6069050cce1SVitaly Wool 		int freechunks = num_free_chunks(zhdr);
607135f97fdSVitaly Wool 
608135f97fdSVitaly Wool 		migrate_disable();
609135f97fdSVitaly Wool 		unbuddied = this_cpu_ptr(pool->unbuddied);
6109050cce1SVitaly Wool 		spin_lock(&pool->lock);
6119050cce1SVitaly Wool 		list_add(&zhdr->buddy, &unbuddied[freechunks]);
6129050cce1SVitaly Wool 		spin_unlock(&pool->lock);
6139050cce1SVitaly Wool 		zhdr->cpu = smp_processor_id();
614135f97fdSVitaly Wool 		migrate_enable();
6159050cce1SVitaly Wool 	}
6169050cce1SVitaly Wool }
6179050cce1SVitaly Wool 
618dcf5aedbSVitaly Wool static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
619dcf5aedbSVitaly Wool {
620dcf5aedbSVitaly Wool 	enum buddy bud = HEADLESS;
621dcf5aedbSVitaly Wool 
622dcf5aedbSVitaly Wool 	if (zhdr->middle_chunks) {
623dcf5aedbSVitaly Wool 		if (!zhdr->first_chunks &&
624dcf5aedbSVitaly Wool 		    chunks <= zhdr->start_middle - ZHDR_CHUNKS)
625dcf5aedbSVitaly Wool 			bud = FIRST;
626dcf5aedbSVitaly Wool 		else if (!zhdr->last_chunks)
627dcf5aedbSVitaly Wool 			bud = LAST;
628dcf5aedbSVitaly Wool 	} else {
629dcf5aedbSVitaly Wool 		if (!zhdr->first_chunks)
630dcf5aedbSVitaly Wool 			bud = FIRST;
631dcf5aedbSVitaly Wool 		else if (!zhdr->last_chunks)
632dcf5aedbSVitaly Wool 			bud = LAST;
633dcf5aedbSVitaly Wool 		else
634dcf5aedbSVitaly Wool 			bud = MIDDLE;
635dcf5aedbSVitaly Wool 	}
636dcf5aedbSVitaly Wool 
637dcf5aedbSVitaly Wool 	return bud;
638dcf5aedbSVitaly Wool }
639dcf5aedbSVitaly Wool 
640ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr,
641ede93213SVitaly Wool 				unsigned short dst_chunk)
642ede93213SVitaly Wool {
643ede93213SVitaly Wool 	void *beg = zhdr;
644ede93213SVitaly Wool 	return memmove(beg + (dst_chunk << CHUNK_SHIFT),
645ede93213SVitaly Wool 		       beg + (zhdr->start_middle << CHUNK_SHIFT),
646ede93213SVitaly Wool 		       zhdr->middle_chunks << CHUNK_SHIFT);
647ede93213SVitaly Wool }
648ede93213SVitaly Wool 
6494a3ac931SVitaly Wool static inline bool buddy_single(struct z3fold_header *zhdr)
6504a3ac931SVitaly Wool {
6514a3ac931SVitaly Wool 	return !((zhdr->first_chunks && zhdr->middle_chunks) ||
6524a3ac931SVitaly Wool 			(zhdr->first_chunks && zhdr->last_chunks) ||
6534a3ac931SVitaly Wool 			(zhdr->middle_chunks && zhdr->last_chunks));
6544a3ac931SVitaly Wool }
6554a3ac931SVitaly Wool 
6564a3ac931SVitaly Wool static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
6574a3ac931SVitaly Wool {
6584a3ac931SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
6594a3ac931SVitaly Wool 	void *p = zhdr;
6604a3ac931SVitaly Wool 	unsigned long old_handle = 0;
6614a3ac931SVitaly Wool 	size_t sz = 0;
6624a3ac931SVitaly Wool 	struct z3fold_header *new_zhdr = NULL;
6634a3ac931SVitaly Wool 	int first_idx = __idx(zhdr, FIRST);
6644a3ac931SVitaly Wool 	int middle_idx = __idx(zhdr, MIDDLE);
6654a3ac931SVitaly Wool 	int last_idx = __idx(zhdr, LAST);
6664a3ac931SVitaly Wool 	unsigned short *moved_chunks = NULL;
6674a3ac931SVitaly Wool 
6684a3ac931SVitaly Wool 	/*
6694a3ac931SVitaly Wool 	 * No need to protect slots here -- all the slots are "local" and
6704a3ac931SVitaly Wool 	 * the page lock is already taken
6714a3ac931SVitaly Wool 	 */
6724a3ac931SVitaly Wool 	if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) {
6734a3ac931SVitaly Wool 		p += ZHDR_SIZE_ALIGNED;
6744a3ac931SVitaly Wool 		sz = zhdr->first_chunks << CHUNK_SHIFT;
6754a3ac931SVitaly Wool 		old_handle = (unsigned long)&zhdr->slots->slot[first_idx];
6764a3ac931SVitaly Wool 		moved_chunks = &zhdr->first_chunks;
6774a3ac931SVitaly Wool 	} else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) {
6784a3ac931SVitaly Wool 		p += zhdr->start_middle << CHUNK_SHIFT;
6794a3ac931SVitaly Wool 		sz = zhdr->middle_chunks << CHUNK_SHIFT;
6804a3ac931SVitaly Wool 		old_handle = (unsigned long)&zhdr->slots->slot[middle_idx];
6814a3ac931SVitaly Wool 		moved_chunks = &zhdr->middle_chunks;
6824a3ac931SVitaly Wool 	} else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) {
6834a3ac931SVitaly Wool 		p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
6844a3ac931SVitaly Wool 		sz = zhdr->last_chunks << CHUNK_SHIFT;
6854a3ac931SVitaly Wool 		old_handle = (unsigned long)&zhdr->slots->slot[last_idx];
6864a3ac931SVitaly Wool 		moved_chunks = &zhdr->last_chunks;
6874a3ac931SVitaly Wool 	}
6884a3ac931SVitaly Wool 
6894a3ac931SVitaly Wool 	if (sz > 0) {
6904a3ac931SVitaly Wool 		enum buddy new_bud = HEADLESS;
6914a3ac931SVitaly Wool 		short chunks = size_to_chunks(sz);
6924a3ac931SVitaly Wool 		void *q;
6934a3ac931SVitaly Wool 
6944a3ac931SVitaly Wool 		new_zhdr = __z3fold_alloc(pool, sz, false);
6954a3ac931SVitaly Wool 		if (!new_zhdr)
6964a3ac931SVitaly Wool 			return NULL;
6974a3ac931SVitaly Wool 
6984a3ac931SVitaly Wool 		if (WARN_ON(new_zhdr == zhdr))
6994a3ac931SVitaly Wool 			goto out_fail;
7004a3ac931SVitaly Wool 
701dcf5aedbSVitaly Wool 		new_bud = get_free_buddy(new_zhdr, chunks);
7024a3ac931SVitaly Wool 		q = new_zhdr;
7034a3ac931SVitaly Wool 		switch (new_bud) {
7044a3ac931SVitaly Wool 		case FIRST:
7054a3ac931SVitaly Wool 			new_zhdr->first_chunks = chunks;
7064a3ac931SVitaly Wool 			q += ZHDR_SIZE_ALIGNED;
7074a3ac931SVitaly Wool 			break;
7084a3ac931SVitaly Wool 		case MIDDLE:
7094a3ac931SVitaly Wool 			new_zhdr->middle_chunks = chunks;
7104a3ac931SVitaly Wool 			new_zhdr->start_middle =
7114a3ac931SVitaly Wool 				new_zhdr->first_chunks + ZHDR_CHUNKS;
7124a3ac931SVitaly Wool 			q += new_zhdr->start_middle << CHUNK_SHIFT;
7134a3ac931SVitaly Wool 			break;
7144a3ac931SVitaly Wool 		case LAST:
7154a3ac931SVitaly Wool 			new_zhdr->last_chunks = chunks;
7164a3ac931SVitaly Wool 			q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT);
7174a3ac931SVitaly Wool 			break;
7184a3ac931SVitaly Wool 		default:
7194a3ac931SVitaly Wool 			goto out_fail;
7204a3ac931SVitaly Wool 		}
7214a3ac931SVitaly Wool 		new_zhdr->foreign_handles++;
7224a3ac931SVitaly Wool 		memcpy(q, p, sz);
7234a3ac931SVitaly Wool 		write_lock(&zhdr->slots->lock);
7244a3ac931SVitaly Wool 		*(unsigned long *)old_handle = (unsigned long)new_zhdr +
7254a3ac931SVitaly Wool 			__idx(new_zhdr, new_bud);
7264a3ac931SVitaly Wool 		if (new_bud == LAST)
7274a3ac931SVitaly Wool 			*(unsigned long *)old_handle |=
7284a3ac931SVitaly Wool 					(new_zhdr->last_chunks << BUDDY_SHIFT);
7294a3ac931SVitaly Wool 		write_unlock(&zhdr->slots->lock);
7304a3ac931SVitaly Wool 		add_to_unbuddied(pool, new_zhdr);
7314a3ac931SVitaly Wool 		z3fold_page_unlock(new_zhdr);
7324a3ac931SVitaly Wool 
7334a3ac931SVitaly Wool 		*moved_chunks = 0;
7344a3ac931SVitaly Wool 	}
7354a3ac931SVitaly Wool 
7364a3ac931SVitaly Wool 	return new_zhdr;
7374a3ac931SVitaly Wool 
7384a3ac931SVitaly Wool out_fail:
7394a3ac931SVitaly Wool 	if (new_zhdr) {
7404a3ac931SVitaly Wool 		if (kref_put(&new_zhdr->refcount, release_z3fold_page_locked))
7414a3ac931SVitaly Wool 			atomic64_dec(&pool->pages_nr);
7424a3ac931SVitaly Wool 		else {
7434a3ac931SVitaly Wool 			add_to_unbuddied(pool, new_zhdr);
7444a3ac931SVitaly Wool 			z3fold_page_unlock(new_zhdr);
7454a3ac931SVitaly Wool 		}
7464a3ac931SVitaly Wool 	}
7474a3ac931SVitaly Wool 	return NULL;
7484a3ac931SVitaly Wool 
7494a3ac931SVitaly Wool }
7504a3ac931SVitaly Wool 
7511b096e5aSVitaly Wool #define BIG_CHUNK_GAP	3
7529a001fc1SVitaly Wool /* Has to be called with lock held */
7539a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr)
7549a001fc1SVitaly Wool {
7559a001fc1SVitaly Wool 	struct page *page = virt_to_page(zhdr);
7569a001fc1SVitaly Wool 
757ede93213SVitaly Wool 	if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
758ede93213SVitaly Wool 		return 0; /* can't move middle chunk, it's used */
7599a001fc1SVitaly Wool 
7601f862989SVitaly Wool 	if (unlikely(PageIsolated(page)))
7611f862989SVitaly Wool 		return 0;
7621f862989SVitaly Wool 
763ede93213SVitaly Wool 	if (zhdr->middle_chunks == 0)
764ede93213SVitaly Wool 		return 0; /* nothing to compact */
765ede93213SVitaly Wool 
766ede93213SVitaly Wool 	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
767ede93213SVitaly Wool 		/* move to the beginning */
768ede93213SVitaly Wool 		mchunk_memmove(zhdr, ZHDR_CHUNKS);
7699a001fc1SVitaly Wool 		zhdr->first_chunks = zhdr->middle_chunks;
7709a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
7719a001fc1SVitaly Wool 		zhdr->start_middle = 0;
7729a001fc1SVitaly Wool 		zhdr->first_num++;
773ede93213SVitaly Wool 		return 1;
7749a001fc1SVitaly Wool 	}
7759a001fc1SVitaly Wool 
7761b096e5aSVitaly Wool 	/*
7771b096e5aSVitaly Wool 	 * moving data is expensive, so let's only do that if
7781b096e5aSVitaly Wool 	 * there's substantial gain (at least BIG_CHUNK_GAP chunks)
7791b096e5aSVitaly Wool 	 */
7801b096e5aSVitaly Wool 	if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
7811b096e5aSVitaly Wool 	    zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
7821b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
7831b096e5aSVitaly Wool 		mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
7841b096e5aSVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
7851b096e5aSVitaly Wool 		return 1;
7861b096e5aSVitaly Wool 	} else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
7871b096e5aSVitaly Wool 		   TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
7881b096e5aSVitaly Wool 					+ zhdr->middle_chunks) >=
7891b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
7901b096e5aSVitaly Wool 		unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
7911b096e5aSVitaly Wool 			zhdr->middle_chunks;
7921b096e5aSVitaly Wool 		mchunk_memmove(zhdr, new_start);
7931b096e5aSVitaly Wool 		zhdr->start_middle = new_start;
7941b096e5aSVitaly Wool 		return 1;
7951b096e5aSVitaly Wool 	}
7961b096e5aSVitaly Wool 
7971b096e5aSVitaly Wool 	return 0;
7981b096e5aSVitaly Wool }
7991b096e5aSVitaly Wool 
800d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked)
801d30561c5SVitaly Wool {
8029050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
803d30561c5SVitaly Wool 	struct page *page;
804d30561c5SVitaly Wool 
805d30561c5SVitaly Wool 	page = virt_to_page(zhdr);
806d30561c5SVitaly Wool 	if (locked)
807d30561c5SVitaly Wool 		WARN_ON(z3fold_page_trylock(zhdr));
808d30561c5SVitaly Wool 	else
809d30561c5SVitaly Wool 		z3fold_page_lock(zhdr);
8105d03a661SVitaly Wool 	if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
811d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
812d30561c5SVitaly Wool 		return;
813d30561c5SVitaly Wool 	}
814d30561c5SVitaly Wool 	spin_lock(&pool->lock);
815d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
816d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
817d30561c5SVitaly Wool 
8185d03a661SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
8195d03a661SVitaly Wool 		atomic64_dec(&pool->pages_nr);
8205d03a661SVitaly Wool 		return;
8215d03a661SVitaly Wool 	}
8225d03a661SVitaly Wool 
823dcf5aedbSVitaly Wool 	if (test_bit(PAGE_STALE, &page->private) ||
824dcf5aedbSVitaly Wool 	    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
8251f862989SVitaly Wool 		z3fold_page_unlock(zhdr);
8261f862989SVitaly Wool 		return;
8271f862989SVitaly Wool 	}
8281f862989SVitaly Wool 
8294a3ac931SVitaly Wool 	if (!zhdr->foreign_handles && buddy_single(zhdr) &&
8304a3ac931SVitaly Wool 	    zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
8314a3ac931SVitaly Wool 		if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
8324a3ac931SVitaly Wool 			atomic64_dec(&pool->pages_nr);
833dcf5aedbSVitaly Wool 		else {
834dcf5aedbSVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
8354a3ac931SVitaly Wool 			z3fold_page_unlock(zhdr);
836dcf5aedbSVitaly Wool 		}
8374a3ac931SVitaly Wool 		return;
8384a3ac931SVitaly Wool 	}
8394a3ac931SVitaly Wool 
840d30561c5SVitaly Wool 	z3fold_compact_page(zhdr);
8419050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
842dcf5aedbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
843d30561c5SVitaly Wool 	z3fold_page_unlock(zhdr);
844d30561c5SVitaly Wool }
845d30561c5SVitaly Wool 
846d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w)
847d30561c5SVitaly Wool {
848d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(w, struct z3fold_header,
849d30561c5SVitaly Wool 						work);
850d30561c5SVitaly Wool 
851d30561c5SVitaly Wool 	do_compact_page(zhdr, false);
852d30561c5SVitaly Wool }
853d30561c5SVitaly Wool 
8549050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */
8559050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
8569050cce1SVitaly Wool 						size_t size, bool can_sleep)
8579050cce1SVitaly Wool {
8589050cce1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
8599050cce1SVitaly Wool 	struct page *page;
8609050cce1SVitaly Wool 	struct list_head *unbuddied;
8619050cce1SVitaly Wool 	int chunks = size_to_chunks(size), i;
8629050cce1SVitaly Wool 
8639050cce1SVitaly Wool lookup:
864135f97fdSVitaly Wool 	migrate_disable();
8659050cce1SVitaly Wool 	/* First, try to find an unbuddied z3fold page. */
866135f97fdSVitaly Wool 	unbuddied = this_cpu_ptr(pool->unbuddied);
8679050cce1SVitaly Wool 	for_each_unbuddied_list(i, chunks) {
8689050cce1SVitaly Wool 		struct list_head *l = &unbuddied[i];
8699050cce1SVitaly Wool 
8709050cce1SVitaly Wool 		zhdr = list_first_entry_or_null(READ_ONCE(l),
8719050cce1SVitaly Wool 					struct z3fold_header, buddy);
8729050cce1SVitaly Wool 
8739050cce1SVitaly Wool 		if (!zhdr)
8749050cce1SVitaly Wool 			continue;
8759050cce1SVitaly Wool 
8769050cce1SVitaly Wool 		/* Re-check under lock. */
8779050cce1SVitaly Wool 		spin_lock(&pool->lock);
8789050cce1SVitaly Wool 		l = &unbuddied[i];
8799050cce1SVitaly Wool 		if (unlikely(zhdr != list_first_entry(READ_ONCE(l),
8809050cce1SVitaly Wool 						struct z3fold_header, buddy)) ||
8819050cce1SVitaly Wool 		    !z3fold_page_trylock(zhdr)) {
8829050cce1SVitaly Wool 			spin_unlock(&pool->lock);
8839050cce1SVitaly Wool 			zhdr = NULL;
884135f97fdSVitaly Wool 			migrate_enable();
8859050cce1SVitaly Wool 			if (can_sleep)
8869050cce1SVitaly Wool 				cond_resched();
8879050cce1SVitaly Wool 			goto lookup;
8889050cce1SVitaly Wool 		}
8899050cce1SVitaly Wool 		list_del_init(&zhdr->buddy);
8909050cce1SVitaly Wool 		zhdr->cpu = -1;
8919050cce1SVitaly Wool 		spin_unlock(&pool->lock);
8929050cce1SVitaly Wool 
8939050cce1SVitaly Wool 		page = virt_to_page(zhdr);
8944a3ac931SVitaly Wool 		if (test_bit(NEEDS_COMPACTING, &page->private) ||
8954a3ac931SVitaly Wool 		    test_bit(PAGE_CLAIMED, &page->private)) {
8969050cce1SVitaly Wool 			z3fold_page_unlock(zhdr);
8979050cce1SVitaly Wool 			zhdr = NULL;
898135f97fdSVitaly Wool 			migrate_enable();
8999050cce1SVitaly Wool 			if (can_sleep)
9009050cce1SVitaly Wool 				cond_resched();
9019050cce1SVitaly Wool 			goto lookup;
9029050cce1SVitaly Wool 		}
9039050cce1SVitaly Wool 
9049050cce1SVitaly Wool 		/*
9059050cce1SVitaly Wool 		 * this page could not be removed from its unbuddied
9069050cce1SVitaly Wool 		 * list while pool lock was held, and then we've taken
9079050cce1SVitaly Wool 		 * page lock so kref_put could not be called before
9089050cce1SVitaly Wool 		 * we got here, so it's safe to just call kref_get()
9099050cce1SVitaly Wool 		 */
9109050cce1SVitaly Wool 		kref_get(&zhdr->refcount);
9119050cce1SVitaly Wool 		break;
9129050cce1SVitaly Wool 	}
913135f97fdSVitaly Wool 	migrate_enable();
9149050cce1SVitaly Wool 
915351618b2SVitaly Wool 	if (!zhdr) {
916351618b2SVitaly Wool 		int cpu;
917351618b2SVitaly Wool 
918351618b2SVitaly Wool 		/* look for _exact_ match on other cpus' lists */
919351618b2SVitaly Wool 		for_each_online_cpu(cpu) {
920351618b2SVitaly Wool 			struct list_head *l;
921351618b2SVitaly Wool 
922351618b2SVitaly Wool 			unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
923351618b2SVitaly Wool 			spin_lock(&pool->lock);
924351618b2SVitaly Wool 			l = &unbuddied[chunks];
925351618b2SVitaly Wool 
926351618b2SVitaly Wool 			zhdr = list_first_entry_or_null(READ_ONCE(l),
927351618b2SVitaly Wool 						struct z3fold_header, buddy);
928351618b2SVitaly Wool 
929351618b2SVitaly Wool 			if (!zhdr || !z3fold_page_trylock(zhdr)) {
930351618b2SVitaly Wool 				spin_unlock(&pool->lock);
931351618b2SVitaly Wool 				zhdr = NULL;
932351618b2SVitaly Wool 				continue;
933351618b2SVitaly Wool 			}
934351618b2SVitaly Wool 			list_del_init(&zhdr->buddy);
935351618b2SVitaly Wool 			zhdr->cpu = -1;
936351618b2SVitaly Wool 			spin_unlock(&pool->lock);
937351618b2SVitaly Wool 
938351618b2SVitaly Wool 			page = virt_to_page(zhdr);
9394a3ac931SVitaly Wool 			if (test_bit(NEEDS_COMPACTING, &page->private) ||
9404a3ac931SVitaly Wool 			    test_bit(PAGE_CLAIMED, &page->private)) {
941351618b2SVitaly Wool 				z3fold_page_unlock(zhdr);
942351618b2SVitaly Wool 				zhdr = NULL;
943351618b2SVitaly Wool 				if (can_sleep)
944351618b2SVitaly Wool 					cond_resched();
945351618b2SVitaly Wool 				continue;
946351618b2SVitaly Wool 			}
947351618b2SVitaly Wool 			kref_get(&zhdr->refcount);
948351618b2SVitaly Wool 			break;
949351618b2SVitaly Wool 		}
950351618b2SVitaly Wool 	}
951351618b2SVitaly Wool 
952fc548865SVitaly Wool 	if (zhdr && !zhdr->slots)
953fc548865SVitaly Wool 		zhdr->slots = alloc_slots(pool,
954fc548865SVitaly Wool 					can_sleep ? GFP_NOIO : GFP_ATOMIC);
9559050cce1SVitaly Wool 	return zhdr;
9569050cce1SVitaly Wool }
957d30561c5SVitaly Wool 
958d30561c5SVitaly Wool /*
959d30561c5SVitaly Wool  * API Functions
960d30561c5SVitaly Wool  */
961d30561c5SVitaly Wool 
962d30561c5SVitaly Wool /**
963d30561c5SVitaly Wool  * z3fold_create_pool() - create a new z3fold pool
964d30561c5SVitaly Wool  * @name:	pool name
965d30561c5SVitaly Wool  * @gfp:	gfp flags when allocating the z3fold pool structure
966d30561c5SVitaly Wool  * @ops:	user-defined operations for the z3fold pool
967d30561c5SVitaly Wool  *
968d30561c5SVitaly Wool  * Return: pointer to the new z3fold pool or NULL if the metadata allocation
969d30561c5SVitaly Wool  * failed.
970d30561c5SVitaly Wool  */
971d30561c5SVitaly Wool static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
972d30561c5SVitaly Wool 		const struct z3fold_ops *ops)
973d30561c5SVitaly Wool {
974d30561c5SVitaly Wool 	struct z3fold_pool *pool = NULL;
975d30561c5SVitaly Wool 	int i, cpu;
976d30561c5SVitaly Wool 
977d30561c5SVitaly Wool 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
978d30561c5SVitaly Wool 	if (!pool)
979d30561c5SVitaly Wool 		goto out;
9807c2b8baaSVitaly Wool 	pool->c_handle = kmem_cache_create("z3fold_handle",
9817c2b8baaSVitaly Wool 				sizeof(struct z3fold_buddy_slots),
9827c2b8baaSVitaly Wool 				SLOTS_ALIGN, 0, NULL);
9837c2b8baaSVitaly Wool 	if (!pool->c_handle)
9847c2b8baaSVitaly Wool 		goto out_c;
985d30561c5SVitaly Wool 	spin_lock_init(&pool->lock);
986d30561c5SVitaly Wool 	spin_lock_init(&pool->stale_lock);
987e891f60eSMiaohe Lin 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS,
988e891f60eSMiaohe Lin 					 __alignof__(struct list_head));
9891ec6995dSXidong Wang 	if (!pool->unbuddied)
9901ec6995dSXidong Wang 		goto out_pool;
991d30561c5SVitaly Wool 	for_each_possible_cpu(cpu) {
992d30561c5SVitaly Wool 		struct list_head *unbuddied =
993d30561c5SVitaly Wool 				per_cpu_ptr(pool->unbuddied, cpu);
994d30561c5SVitaly Wool 		for_each_unbuddied_list(i, 0)
995d30561c5SVitaly Wool 			INIT_LIST_HEAD(&unbuddied[i]);
996d30561c5SVitaly Wool 	}
997d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->lru);
998d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->stale);
999d30561c5SVitaly Wool 	atomic64_set(&pool->pages_nr, 0);
1000d30561c5SVitaly Wool 	pool->name = name;
1001d30561c5SVitaly Wool 	pool->compact_wq = create_singlethread_workqueue(pool->name);
1002d30561c5SVitaly Wool 	if (!pool->compact_wq)
10031ec6995dSXidong Wang 		goto out_unbuddied;
1004d30561c5SVitaly Wool 	pool->release_wq = create_singlethread_workqueue(pool->name);
1005d30561c5SVitaly Wool 	if (!pool->release_wq)
1006d30561c5SVitaly Wool 		goto out_wq;
10071f862989SVitaly Wool 	if (z3fold_register_migration(pool))
10081f862989SVitaly Wool 		goto out_rwq;
1009d30561c5SVitaly Wool 	INIT_WORK(&pool->work, free_pages_work);
1010d30561c5SVitaly Wool 	pool->ops = ops;
1011d30561c5SVitaly Wool 	return pool;
1012d30561c5SVitaly Wool 
10131f862989SVitaly Wool out_rwq:
10141f862989SVitaly Wool 	destroy_workqueue(pool->release_wq);
1015d30561c5SVitaly Wool out_wq:
1016d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
10171ec6995dSXidong Wang out_unbuddied:
10181ec6995dSXidong Wang 	free_percpu(pool->unbuddied);
10191ec6995dSXidong Wang out_pool:
10207c2b8baaSVitaly Wool 	kmem_cache_destroy(pool->c_handle);
10217c2b8baaSVitaly Wool out_c:
1022d30561c5SVitaly Wool 	kfree(pool);
10231ec6995dSXidong Wang out:
1024d30561c5SVitaly Wool 	return NULL;
1025d30561c5SVitaly Wool }
1026d30561c5SVitaly Wool 
1027d30561c5SVitaly Wool /**
1028d30561c5SVitaly Wool  * z3fold_destroy_pool() - destroys an existing z3fold pool
1029d30561c5SVitaly Wool  * @pool:	the z3fold pool to be destroyed
1030d30561c5SVitaly Wool  *
1031d30561c5SVitaly Wool  * The pool should be emptied before this function is called.
1032d30561c5SVitaly Wool  */
1033d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool)
1034d30561c5SVitaly Wool {
10357c2b8baaSVitaly Wool 	kmem_cache_destroy(pool->c_handle);
10366051d3bdSHenry Burns 
10376051d3bdSHenry Burns 	/*
10386051d3bdSHenry Burns 	 * We need to destroy pool->compact_wq before pool->release_wq,
10396051d3bdSHenry Burns 	 * as any pending work on pool->compact_wq will call
10406051d3bdSHenry Burns 	 * queue_work(pool->release_wq, &pool->work).
1041b997052bSHenry Burns 	 *
1042b997052bSHenry Burns 	 * There are still outstanding pages until both workqueues are drained,
1043b997052bSHenry Burns 	 * so we cannot unregister migration until then.
10446051d3bdSHenry Burns 	 */
10456051d3bdSHenry Burns 
1046d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
10476051d3bdSHenry Burns 	destroy_workqueue(pool->release_wq);
1048b997052bSHenry Burns 	z3fold_unregister_migration(pool);
1049d30561c5SVitaly Wool 	kfree(pool);
1050d30561c5SVitaly Wool }
1051d30561c5SVitaly Wool 
10529a001fc1SVitaly Wool /**
10539a001fc1SVitaly Wool  * z3fold_alloc() - allocates a region of a given size
10549a001fc1SVitaly Wool  * @pool:	z3fold pool from which to allocate
10559a001fc1SVitaly Wool  * @size:	size in bytes of the desired allocation
10569a001fc1SVitaly Wool  * @gfp:	gfp flags used if the pool needs to grow
10579a001fc1SVitaly Wool  * @handle:	handle of the new allocation
10589a001fc1SVitaly Wool  *
10599a001fc1SVitaly Wool  * This function will attempt to find a free region in the pool large enough to
10609a001fc1SVitaly Wool  * satisfy the allocation request.  A search of the unbuddied lists is
10619a001fc1SVitaly Wool  * performed first. If no suitable free region is found, then a new page is
10629a001fc1SVitaly Wool  * allocated and added to the pool to satisfy the request.
10639a001fc1SVitaly Wool  *
10649a001fc1SVitaly Wool  * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
10659a001fc1SVitaly Wool  * as z3fold pool pages.
10669a001fc1SVitaly Wool  *
10679a001fc1SVitaly Wool  * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
10689a001fc1SVitaly Wool  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
10699a001fc1SVitaly Wool  * a new page.
10709a001fc1SVitaly Wool  */
10719a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
10729a001fc1SVitaly Wool 			unsigned long *handle)
10739a001fc1SVitaly Wool {
10749050cce1SVitaly Wool 	int chunks = size_to_chunks(size);
10759a001fc1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
1076d30561c5SVitaly Wool 	struct page *page = NULL;
10779a001fc1SVitaly Wool 	enum buddy bud;
10788a97ea54SMatthew Wilcox 	bool can_sleep = gfpflags_allow_blocking(gfp);
10799a001fc1SVitaly Wool 
1080f1549cb5SHenry Burns 	if (!size)
10819a001fc1SVitaly Wool 		return -EINVAL;
10829a001fc1SVitaly Wool 
10839a001fc1SVitaly Wool 	if (size > PAGE_SIZE)
10849a001fc1SVitaly Wool 		return -ENOSPC;
10859a001fc1SVitaly Wool 
10869a001fc1SVitaly Wool 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
10879a001fc1SVitaly Wool 		bud = HEADLESS;
10889a001fc1SVitaly Wool 	else {
10899050cce1SVitaly Wool retry:
10909050cce1SVitaly Wool 		zhdr = __z3fold_alloc(pool, size, can_sleep);
1091d30561c5SVitaly Wool 		if (zhdr) {
1092dcf5aedbSVitaly Wool 			bud = get_free_buddy(zhdr, chunks);
1093dcf5aedbSVitaly Wool 			if (bud == HEADLESS) {
10945a27aa82SVitaly Wool 				if (kref_put(&zhdr->refcount,
1095d30561c5SVitaly Wool 					     release_z3fold_page_locked))
10965a27aa82SVitaly Wool 					atomic64_dec(&pool->pages_nr);
1097d30561c5SVitaly Wool 				else
1098d30561c5SVitaly Wool 					z3fold_page_unlock(zhdr);
10999a001fc1SVitaly Wool 				pr_err("No free chunks in unbuddied\n");
11009a001fc1SVitaly Wool 				WARN_ON(1);
11019050cce1SVitaly Wool 				goto retry;
11029a001fc1SVitaly Wool 			}
11039050cce1SVitaly Wool 			page = virt_to_page(zhdr);
11049a001fc1SVitaly Wool 			goto found;
11059a001fc1SVitaly Wool 		}
11069a001fc1SVitaly Wool 		bud = FIRST;
11079a001fc1SVitaly Wool 	}
11089a001fc1SVitaly Wool 
11095c9bab59SVitaly Wool 	page = NULL;
11105c9bab59SVitaly Wool 	if (can_sleep) {
1111d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
1112d30561c5SVitaly Wool 		zhdr = list_first_entry_or_null(&pool->stale,
1113d30561c5SVitaly Wool 						struct z3fold_header, buddy);
1114d30561c5SVitaly Wool 		/*
11155c9bab59SVitaly Wool 		 * Before allocating a page, let's see if we can take one from
11165c9bab59SVitaly Wool 		 * the stale pages list. cancel_work_sync() can sleep so we
11175c9bab59SVitaly Wool 		 * limit this case to the contexts where we can sleep
1118d30561c5SVitaly Wool 		 */
11195c9bab59SVitaly Wool 		if (zhdr) {
1120d30561c5SVitaly Wool 			list_del(&zhdr->buddy);
1121d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
1122d30561c5SVitaly Wool 			cancel_work_sync(&zhdr->work);
1123d30561c5SVitaly Wool 			page = virt_to_page(zhdr);
1124d30561c5SVitaly Wool 		} else {
1125d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
1126d30561c5SVitaly Wool 		}
11275c9bab59SVitaly Wool 	}
11285c9bab59SVitaly Wool 	if (!page)
11295c9bab59SVitaly Wool 		page = alloc_page(gfp);
1130d30561c5SVitaly Wool 
11319a001fc1SVitaly Wool 	if (!page)
11329a001fc1SVitaly Wool 		return -ENOMEM;
11332f1e5e4dSVitaly Wool 
113463398413SVitaly Wool 	zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp);
11359050cce1SVitaly Wool 	if (!zhdr) {
11369050cce1SVitaly Wool 		__free_page(page);
11379050cce1SVitaly Wool 		return -ENOMEM;
11389050cce1SVitaly Wool 	}
11399050cce1SVitaly Wool 	atomic64_inc(&pool->pages_nr);
11409a001fc1SVitaly Wool 
11419a001fc1SVitaly Wool 	if (bud == HEADLESS) {
11429a001fc1SVitaly Wool 		set_bit(PAGE_HEADLESS, &page->private);
11439a001fc1SVitaly Wool 		goto headless;
11449a001fc1SVitaly Wool 	}
1145810481a2SHenry Burns 	if (can_sleep) {
1146810481a2SHenry Burns 		lock_page(page);
11471f862989SVitaly Wool 		__SetPageMovable(page, pool->inode->i_mapping);
1148810481a2SHenry Burns 		unlock_page(page);
1149810481a2SHenry Burns 	} else {
1150810481a2SHenry Burns 		if (trylock_page(page)) {
1151810481a2SHenry Burns 			__SetPageMovable(page, pool->inode->i_mapping);
1152810481a2SHenry Burns 			unlock_page(page);
1153810481a2SHenry Burns 		}
1154810481a2SHenry Burns 	}
11552f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
11569a001fc1SVitaly Wool 
11579a001fc1SVitaly Wool found:
11589a001fc1SVitaly Wool 	if (bud == FIRST)
11599a001fc1SVitaly Wool 		zhdr->first_chunks = chunks;
11609a001fc1SVitaly Wool 	else if (bud == LAST)
11619a001fc1SVitaly Wool 		zhdr->last_chunks = chunks;
11629a001fc1SVitaly Wool 	else {
11639a001fc1SVitaly Wool 		zhdr->middle_chunks = chunks;
1164ede93213SVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
11659a001fc1SVitaly Wool 	}
11669050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
11679a001fc1SVitaly Wool 
11689a001fc1SVitaly Wool headless:
1169d30561c5SVitaly Wool 	spin_lock(&pool->lock);
11709a001fc1SVitaly Wool 	/* Add/move z3fold page to beginning of LRU */
11719a001fc1SVitaly Wool 	if (!list_empty(&page->lru))
11729a001fc1SVitaly Wool 		list_del(&page->lru);
11739a001fc1SVitaly Wool 
11749a001fc1SVitaly Wool 	list_add(&page->lru, &pool->lru);
11759a001fc1SVitaly Wool 
11769a001fc1SVitaly Wool 	*handle = encode_handle(zhdr, bud);
11779a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
11782f1e5e4dSVitaly Wool 	if (bud != HEADLESS)
11792f1e5e4dSVitaly Wool 		z3fold_page_unlock(zhdr);
11809a001fc1SVitaly Wool 
11819a001fc1SVitaly Wool 	return 0;
11829a001fc1SVitaly Wool }
11839a001fc1SVitaly Wool 
11849a001fc1SVitaly Wool /**
11859a001fc1SVitaly Wool  * z3fold_free() - frees the allocation associated with the given handle
11869a001fc1SVitaly Wool  * @pool:	pool in which the allocation resided
11879a001fc1SVitaly Wool  * @handle:	handle associated with the allocation returned by z3fold_alloc()
11889a001fc1SVitaly Wool  *
11899a001fc1SVitaly Wool  * In the case that the z3fold page in which the allocation resides is under
11909a001fc1SVitaly Wool  * reclaim, as indicated by the PG_reclaim flag being set, this function
11919a001fc1SVitaly Wool  * only sets the first|last_chunks to 0.  The page is actually freed
11929a001fc1SVitaly Wool  * once both buddies are evicted (see z3fold_reclaim_page() below).
11939a001fc1SVitaly Wool  */
11949a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
11959a001fc1SVitaly Wool {
11969a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
11979a001fc1SVitaly Wool 	struct page *page;
11989a001fc1SVitaly Wool 	enum buddy bud;
11995b6807deSVitaly Wool 	bool page_claimed;
12009a001fc1SVitaly Wool 
12014a3ac931SVitaly Wool 	zhdr = get_z3fold_header(handle);
12029a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
12035b6807deSVitaly Wool 	page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private);
12049a001fc1SVitaly Wool 
12059a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private)) {
1206ca0246bbSVitaly Wool 		/* if a headless page is under reclaim, just leave.
1207ca0246bbSVitaly Wool 		 * NB: we use test_and_set_bit for a reason: if the bit
1208ca0246bbSVitaly Wool 		 * has not been set before, we release this page
1209ca0246bbSVitaly Wool 		 * immediately so we don't care about its value any more.
1210ca0246bbSVitaly Wool 		 */
12115b6807deSVitaly Wool 		if (!page_claimed) {
1212ca0246bbSVitaly Wool 			spin_lock(&pool->lock);
1213ca0246bbSVitaly Wool 			list_del(&page->lru);
1214ca0246bbSVitaly Wool 			spin_unlock(&pool->lock);
12154a3ac931SVitaly Wool 			put_z3fold_header(zhdr);
12161f862989SVitaly Wool 			free_z3fold_page(page, true);
1217ca0246bbSVitaly Wool 			atomic64_dec(&pool->pages_nr);
1218ca0246bbSVitaly Wool 		}
1219ca0246bbSVitaly Wool 		return;
1220ca0246bbSVitaly Wool 	}
1221ca0246bbSVitaly Wool 
1222ca0246bbSVitaly Wool 	/* Non-headless case */
122343afc194SVitaly Wool 	bud = handle_to_buddy(handle);
12249a001fc1SVitaly Wool 
12259a001fc1SVitaly Wool 	switch (bud) {
12269a001fc1SVitaly Wool 	case FIRST:
12279a001fc1SVitaly Wool 		zhdr->first_chunks = 0;
12289a001fc1SVitaly Wool 		break;
12299a001fc1SVitaly Wool 	case MIDDLE:
12309a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
12319a001fc1SVitaly Wool 		break;
12329a001fc1SVitaly Wool 	case LAST:
12339a001fc1SVitaly Wool 		zhdr->last_chunks = 0;
12349a001fc1SVitaly Wool 		break;
12359a001fc1SVitaly Wool 	default:
12369a001fc1SVitaly Wool 		pr_err("%s: unknown bud %d\n", __func__, bud);
12379a001fc1SVitaly Wool 		WARN_ON(1);
12384a3ac931SVitaly Wool 		put_z3fold_header(zhdr);
12399a001fc1SVitaly Wool 		return;
12409a001fc1SVitaly Wool 	}
12419a001fc1SVitaly Wool 
12424a3ac931SVitaly Wool 	if (!page_claimed)
1243fc548865SVitaly Wool 		free_handle(handle, zhdr);
1244d30561c5SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
1245d30561c5SVitaly Wool 		atomic64_dec(&pool->pages_nr);
1246d30561c5SVitaly Wool 		return;
1247d30561c5SVitaly Wool 	}
12485b6807deSVitaly Wool 	if (page_claimed) {
12495b6807deSVitaly Wool 		/* the page has not been claimed by us */
12506098d7e1SVitaly Wool 		z3fold_page_unlock(zhdr);
12516098d7e1SVitaly Wool 		return;
12526098d7e1SVitaly Wool 	}
1253dcf5aedbSVitaly Wool 	if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
12544a3ac931SVitaly Wool 		put_z3fold_header(zhdr);
12555b6807deSVitaly Wool 		clear_bit(PAGE_CLAIMED, &page->private);
1256d30561c5SVitaly Wool 		return;
1257d30561c5SVitaly Wool 	}
1258d30561c5SVitaly Wool 	if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
1259d30561c5SVitaly Wool 		spin_lock(&pool->lock);
1260d30561c5SVitaly Wool 		list_del_init(&zhdr->buddy);
1261d30561c5SVitaly Wool 		spin_unlock(&pool->lock);
1262d30561c5SVitaly Wool 		zhdr->cpu = -1;
12635d03a661SVitaly Wool 		kref_get(&zhdr->refcount);
12645b6807deSVitaly Wool 		clear_bit(PAGE_CLAIMED, &page->private);
12654a3ac931SVitaly Wool 		do_compact_page(zhdr, true);
1266d30561c5SVitaly Wool 		return;
1267d30561c5SVitaly Wool 	}
12685d03a661SVitaly Wool 	kref_get(&zhdr->refcount);
12695b6807deSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
12704a3ac931SVitaly Wool 	queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
12714a3ac931SVitaly Wool 	put_z3fold_header(zhdr);
12729a001fc1SVitaly Wool }
12739a001fc1SVitaly Wool 
12749a001fc1SVitaly Wool /**
12759a001fc1SVitaly Wool  * z3fold_reclaim_page() - evicts allocations from a pool page and frees it
12769a001fc1SVitaly Wool  * @pool:	pool from which a page will attempt to be evicted
1277f144c390SMike Rapoport  * @retries:	number of pages on the LRU list for which eviction will
12789a001fc1SVitaly Wool  *		be attempted before failing
12799a001fc1SVitaly Wool  *
12809a001fc1SVitaly Wool  * z3fold reclaim is different from normal system reclaim in that it is done
12819a001fc1SVitaly Wool  * from the bottom, up. This is because only the bottom layer, z3fold, has
12829a001fc1SVitaly Wool  * information on how the allocations are organized within each z3fold page.
12839a001fc1SVitaly Wool  * This has the potential to create interesting locking situations between
12849a001fc1SVitaly Wool  * z3fold and the user, however.
12859a001fc1SVitaly Wool  *
12869a001fc1SVitaly Wool  * To avoid these, this is how z3fold_reclaim_page() should be called:
1287f144c390SMike Rapoport  *
12889a001fc1SVitaly Wool  * The user detects a page should be reclaimed and calls z3fold_reclaim_page().
12899a001fc1SVitaly Wool  * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and
12909a001fc1SVitaly Wool  * call the user-defined eviction handler with the pool and handle as
12919a001fc1SVitaly Wool  * arguments.
12929a001fc1SVitaly Wool  *
12939a001fc1SVitaly Wool  * If the handle can not be evicted, the eviction handler should return
12949a001fc1SVitaly Wool  * non-zero. z3fold_reclaim_page() will add the z3fold page back to the
12959a001fc1SVitaly Wool  * appropriate list and try the next z3fold page on the LRU up to
12969a001fc1SVitaly Wool  * a user defined number of retries.
12979a001fc1SVitaly Wool  *
12989a001fc1SVitaly Wool  * If the handle is successfully evicted, the eviction handler should
12999a001fc1SVitaly Wool  * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free()
13009a001fc1SVitaly Wool  * contains logic to delay freeing the page if the page is under reclaim,
13019a001fc1SVitaly Wool  * as indicated by the setting of the PG_reclaim flag on the underlying page.
13029a001fc1SVitaly Wool  *
13039a001fc1SVitaly Wool  * If all buddies in the z3fold page are successfully evicted, then the
13049a001fc1SVitaly Wool  * z3fold page can be freed.
13059a001fc1SVitaly Wool  *
13069a001fc1SVitaly Wool  * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
13079a001fc1SVitaly Wool  * no pages to evict or an eviction handler is not registered, -EAGAIN if
13089a001fc1SVitaly Wool  * the retry limit was hit.
13099a001fc1SVitaly Wool  */
13109a001fc1SVitaly Wool static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
13119a001fc1SVitaly Wool {
13124a3ac931SVitaly Wool 	int i, ret = -1;
1313d30561c5SVitaly Wool 	struct z3fold_header *zhdr = NULL;
1314d30561c5SVitaly Wool 	struct page *page = NULL;
1315d30561c5SVitaly Wool 	struct list_head *pos;
13169a001fc1SVitaly Wool 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
1317dcf5aedbSVitaly Wool 	struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
1318dcf5aedbSVitaly Wool 
1319dcf5aedbSVitaly Wool 	rwlock_init(&slots.lock);
1320dcf5aedbSVitaly Wool 	slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
13219a001fc1SVitaly Wool 
13229a001fc1SVitaly Wool 	spin_lock(&pool->lock);
13232f1e5e4dSVitaly Wool 	if (!pool->ops || !pool->ops->evict || retries == 0) {
13249a001fc1SVitaly Wool 		spin_unlock(&pool->lock);
13259a001fc1SVitaly Wool 		return -EINVAL;
13269a001fc1SVitaly Wool 	}
13279a001fc1SVitaly Wool 	for (i = 0; i < retries; i++) {
13282f1e5e4dSVitaly Wool 		if (list_empty(&pool->lru)) {
13292f1e5e4dSVitaly Wool 			spin_unlock(&pool->lock);
13302f1e5e4dSVitaly Wool 			return -EINVAL;
13312f1e5e4dSVitaly Wool 		}
1332d30561c5SVitaly Wool 		list_for_each_prev(pos, &pool->lru) {
1333d30561c5SVitaly Wool 			page = list_entry(pos, struct page, lru);
1334ca0246bbSVitaly Wool 
13353f9d2b57SVitaly Wool 			zhdr = page_address(page);
13366d679578SThomas Hebb 			if (test_bit(PAGE_HEADLESS, &page->private)) {
13376d679578SThomas Hebb 				/*
13386d679578SThomas Hebb 				 * For non-headless pages, we wait to do this
13396d679578SThomas Hebb 				 * until we have the page lock to avoid racing
13406d679578SThomas Hebb 				 * with __z3fold_alloc(). Headless pages don't
13416d679578SThomas Hebb 				 * have a lock (and __z3fold_alloc() will never
13426d679578SThomas Hebb 				 * see them), but we still need to test and set
13436d679578SThomas Hebb 				 * PAGE_CLAIMED to avoid racing with
13446d679578SThomas Hebb 				 * z3fold_free(), so just do it now before
13456d679578SThomas Hebb 				 * leaving the loop.
13466d679578SThomas Hebb 				 */
13476d679578SThomas Hebb 				if (test_and_set_bit(PAGE_CLAIMED, &page->private))
13486d679578SThomas Hebb 					continue;
13496d679578SThomas Hebb 
1350ca0246bbSVitaly Wool 				break;
13516d679578SThomas Hebb 			}
1352ca0246bbSVitaly Wool 
1353dcf5aedbSVitaly Wool 			if (kref_get_unless_zero(&zhdr->refcount) == 0) {
1354dcf5aedbSVitaly Wool 				zhdr = NULL;
1355dcf5aedbSVitaly Wool 				break;
1356dcf5aedbSVitaly Wool 			}
1357ca0246bbSVitaly Wool 			if (!z3fold_page_trylock(zhdr)) {
1358dcf5aedbSVitaly Wool 				if (kref_put(&zhdr->refcount,
1359dcf5aedbSVitaly Wool 						release_z3fold_page))
1360dcf5aedbSVitaly Wool 					atomic64_dec(&pool->pages_nr);
1361ca0246bbSVitaly Wool 				zhdr = NULL;
1362d30561c5SVitaly Wool 				continue; /* can't evict at this point */
1363ca0246bbSVitaly Wool 			}
1364dcf5aedbSVitaly Wool 
1365dcf5aedbSVitaly Wool 			/* test_and_set_bit is of course atomic, but we still
1366dcf5aedbSVitaly Wool 			 * need to do it under page lock, otherwise checking
1367dcf5aedbSVitaly Wool 			 * that bit in __z3fold_alloc wouldn't make sense
1368dcf5aedbSVitaly Wool 			 */
1369dcf5aedbSVitaly Wool 			if (zhdr->foreign_handles ||
1370dcf5aedbSVitaly Wool 			    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
1371dcf5aedbSVitaly Wool 				if (kref_put(&zhdr->refcount,
1372dcf5aedbSVitaly Wool 						release_z3fold_page))
1373dcf5aedbSVitaly Wool 					atomic64_dec(&pool->pages_nr);
1374dcf5aedbSVitaly Wool 				else
13754a3ac931SVitaly Wool 					z3fold_page_unlock(zhdr);
13764a3ac931SVitaly Wool 				zhdr = NULL;
13774a3ac931SVitaly Wool 				continue; /* can't evict such page */
13784a3ac931SVitaly Wool 			}
1379d30561c5SVitaly Wool 			list_del_init(&zhdr->buddy);
1380d30561c5SVitaly Wool 			zhdr->cpu = -1;
13816098d7e1SVitaly Wool 			break;
1382d30561c5SVitaly Wool 		}
1383d30561c5SVitaly Wool 
1384ca0246bbSVitaly Wool 		if (!zhdr)
1385ca0246bbSVitaly Wool 			break;
1386ca0246bbSVitaly Wool 
1387d30561c5SVitaly Wool 		list_del_init(&page->lru);
13882f1e5e4dSVitaly Wool 		spin_unlock(&pool->lock);
1389d30561c5SVitaly Wool 
1390d30561c5SVitaly Wool 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
13919a001fc1SVitaly Wool 			/*
13923f9d2b57SVitaly Wool 			 * We need encode the handles before unlocking, and
13933f9d2b57SVitaly Wool 			 * use our local slots structure because z3fold_free
13943f9d2b57SVitaly Wool 			 * can zero out zhdr->slots and we can't do much
13953f9d2b57SVitaly Wool 			 * about that
13969a001fc1SVitaly Wool 			 */
13979a001fc1SVitaly Wool 			first_handle = 0;
13989a001fc1SVitaly Wool 			last_handle = 0;
13999a001fc1SVitaly Wool 			middle_handle = 0;
1400dcf5aedbSVitaly Wool 			memset(slots.slot, 0, sizeof(slots.slot));
14019a001fc1SVitaly Wool 			if (zhdr->first_chunks)
1402dcf5aedbSVitaly Wool 				first_handle = __encode_handle(zhdr, &slots,
1403dcf5aedbSVitaly Wool 								FIRST);
14049a001fc1SVitaly Wool 			if (zhdr->middle_chunks)
1405dcf5aedbSVitaly Wool 				middle_handle = __encode_handle(zhdr, &slots,
1406dcf5aedbSVitaly Wool 								MIDDLE);
14079a001fc1SVitaly Wool 			if (zhdr->last_chunks)
1408dcf5aedbSVitaly Wool 				last_handle = __encode_handle(zhdr, &slots,
1409dcf5aedbSVitaly Wool 								LAST);
1410d30561c5SVitaly Wool 			/*
1411d30561c5SVitaly Wool 			 * it's safe to unlock here because we hold a
1412d30561c5SVitaly Wool 			 * reference to this page
1413d30561c5SVitaly Wool 			 */
14142f1e5e4dSVitaly Wool 			z3fold_page_unlock(zhdr);
14159a001fc1SVitaly Wool 		} else {
14164a3ac931SVitaly Wool 			first_handle = encode_handle(zhdr, HEADLESS);
14179a001fc1SVitaly Wool 			last_handle = middle_handle = 0;
14182f1e5e4dSVitaly Wool 		}
14199a001fc1SVitaly Wool 		/* Issue the eviction callback(s) */
14209a001fc1SVitaly Wool 		if (middle_handle) {
14219a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, middle_handle);
14229a001fc1SVitaly Wool 			if (ret)
14239a001fc1SVitaly Wool 				goto next;
14249a001fc1SVitaly Wool 		}
14259a001fc1SVitaly Wool 		if (first_handle) {
14269a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, first_handle);
14279a001fc1SVitaly Wool 			if (ret)
14289a001fc1SVitaly Wool 				goto next;
14299a001fc1SVitaly Wool 		}
14309a001fc1SVitaly Wool 		if (last_handle) {
14319a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, last_handle);
14329a001fc1SVitaly Wool 			if (ret)
14339a001fc1SVitaly Wool 				goto next;
14349a001fc1SVitaly Wool 		}
14359a001fc1SVitaly Wool next:
14365a27aa82SVitaly Wool 		if (test_bit(PAGE_HEADLESS, &page->private)) {
14375a27aa82SVitaly Wool 			if (ret == 0) {
14381f862989SVitaly Wool 				free_z3fold_page(page, true);
1439ca0246bbSVitaly Wool 				atomic64_dec(&pool->pages_nr);
14409a001fc1SVitaly Wool 				return 0;
14415a27aa82SVitaly Wool 			}
14426098d7e1SVitaly Wool 			spin_lock(&pool->lock);
14436098d7e1SVitaly Wool 			list_add(&page->lru, &pool->lru);
1444d5567c9dSVitaly Wool 			spin_unlock(&pool->lock);
14453f9d2b57SVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
14466098d7e1SVitaly Wool 		} else {
1447dcf5aedbSVitaly Wool 			struct z3fold_buddy_slots *slots = zhdr->slots;
14486098d7e1SVitaly Wool 			z3fold_page_lock(zhdr);
14496098d7e1SVitaly Wool 			if (kref_put(&zhdr->refcount,
14506098d7e1SVitaly Wool 					release_z3fold_page_locked)) {
1451dcf5aedbSVitaly Wool 				kmem_cache_free(pool->c_handle, slots);
14526098d7e1SVitaly Wool 				atomic64_dec(&pool->pages_nr);
14535a27aa82SVitaly Wool 				return 0;
14545a27aa82SVitaly Wool 			}
14555a27aa82SVitaly Wool 			/*
14566098d7e1SVitaly Wool 			 * if we are here, the page is still not completely
14576098d7e1SVitaly Wool 			 * free. Take the global pool lock then to be able
14586098d7e1SVitaly Wool 			 * to add it back to the lru list
14595a27aa82SVitaly Wool 			 */
14606098d7e1SVitaly Wool 			spin_lock(&pool->lock);
14619a001fc1SVitaly Wool 			list_add(&page->lru, &pool->lru);
14626098d7e1SVitaly Wool 			spin_unlock(&pool->lock);
14636098d7e1SVitaly Wool 			z3fold_page_unlock(zhdr);
14643f9d2b57SVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
14656098d7e1SVitaly Wool 		}
14666098d7e1SVitaly Wool 
14676098d7e1SVitaly Wool 		/* We started off locked to we need to lock the pool back */
14686098d7e1SVitaly Wool 		spin_lock(&pool->lock);
14699a001fc1SVitaly Wool 	}
14709a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
14719a001fc1SVitaly Wool 	return -EAGAIN;
14729a001fc1SVitaly Wool }
14739a001fc1SVitaly Wool 
14749a001fc1SVitaly Wool /**
14759a001fc1SVitaly Wool  * z3fold_map() - maps the allocation associated with the given handle
14769a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
14779a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be mapped
14789a001fc1SVitaly Wool  *
14799a001fc1SVitaly Wool  * Extracts the buddy number from handle and constructs the pointer to the
14809a001fc1SVitaly Wool  * correct starting chunk within the page.
14819a001fc1SVitaly Wool  *
14829a001fc1SVitaly Wool  * Returns: a pointer to the mapped allocation
14839a001fc1SVitaly Wool  */
14849a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
14859a001fc1SVitaly Wool {
14869a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
14879a001fc1SVitaly Wool 	struct page *page;
14889a001fc1SVitaly Wool 	void *addr;
14899a001fc1SVitaly Wool 	enum buddy buddy;
14909a001fc1SVitaly Wool 
14914a3ac931SVitaly Wool 	zhdr = get_z3fold_header(handle);
14929a001fc1SVitaly Wool 	addr = zhdr;
14939a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
14949a001fc1SVitaly Wool 
14959a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
14969a001fc1SVitaly Wool 		goto out;
14979a001fc1SVitaly Wool 
14989a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
14999a001fc1SVitaly Wool 	switch (buddy) {
15009a001fc1SVitaly Wool 	case FIRST:
15019a001fc1SVitaly Wool 		addr += ZHDR_SIZE_ALIGNED;
15029a001fc1SVitaly Wool 		break;
15039a001fc1SVitaly Wool 	case MIDDLE:
15049a001fc1SVitaly Wool 		addr += zhdr->start_middle << CHUNK_SHIFT;
15059a001fc1SVitaly Wool 		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
15069a001fc1SVitaly Wool 		break;
15079a001fc1SVitaly Wool 	case LAST:
1508ca0246bbSVitaly Wool 		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
15099a001fc1SVitaly Wool 		break;
15109a001fc1SVitaly Wool 	default:
15119a001fc1SVitaly Wool 		pr_err("unknown buddy id %d\n", buddy);
15129a001fc1SVitaly Wool 		WARN_ON(1);
15139a001fc1SVitaly Wool 		addr = NULL;
15149a001fc1SVitaly Wool 		break;
15159a001fc1SVitaly Wool 	}
15162f1e5e4dSVitaly Wool 
15171f862989SVitaly Wool 	if (addr)
15181f862989SVitaly Wool 		zhdr->mapped_count++;
15199a001fc1SVitaly Wool out:
15204a3ac931SVitaly Wool 	put_z3fold_header(zhdr);
15219a001fc1SVitaly Wool 	return addr;
15229a001fc1SVitaly Wool }
15239a001fc1SVitaly Wool 
15249a001fc1SVitaly Wool /**
15259a001fc1SVitaly Wool  * z3fold_unmap() - unmaps the allocation associated with the given handle
15269a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
15279a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be unmapped
15289a001fc1SVitaly Wool  */
15299a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
15309a001fc1SVitaly Wool {
15319a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
15329a001fc1SVitaly Wool 	struct page *page;
15339a001fc1SVitaly Wool 	enum buddy buddy;
15349a001fc1SVitaly Wool 
15354a3ac931SVitaly Wool 	zhdr = get_z3fold_header(handle);
15369a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
15379a001fc1SVitaly Wool 
15382f1e5e4dSVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
15399a001fc1SVitaly Wool 		return;
15409a001fc1SVitaly Wool 
15419a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
15429a001fc1SVitaly Wool 	if (buddy == MIDDLE)
15439a001fc1SVitaly Wool 		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
15441f862989SVitaly Wool 	zhdr->mapped_count--;
15454a3ac931SVitaly Wool 	put_z3fold_header(zhdr);
15469a001fc1SVitaly Wool }
15479a001fc1SVitaly Wool 
15489a001fc1SVitaly Wool /**
15499a001fc1SVitaly Wool  * z3fold_get_pool_size() - gets the z3fold pool size in pages
15509a001fc1SVitaly Wool  * @pool:	pool whose size is being queried
15519a001fc1SVitaly Wool  *
155212d59ae6SVitaly Wool  * Returns: size in pages of the given pool.
15539a001fc1SVitaly Wool  */
15549a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
15559a001fc1SVitaly Wool {
155612d59ae6SVitaly Wool 	return atomic64_read(&pool->pages_nr);
15579a001fc1SVitaly Wool }
15589a001fc1SVitaly Wool 
15591f862989SVitaly Wool static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
15601f862989SVitaly Wool {
15611f862989SVitaly Wool 	struct z3fold_header *zhdr;
15621f862989SVitaly Wool 	struct z3fold_pool *pool;
15631f862989SVitaly Wool 
15641f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageMovable(page), page);
15651f862989SVitaly Wool 	VM_BUG_ON_PAGE(PageIsolated(page), page);
15661f862989SVitaly Wool 
1567dcf5aedbSVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
15681f862989SVitaly Wool 		return false;
15691f862989SVitaly Wool 
15701f862989SVitaly Wool 	zhdr = page_address(page);
15711f862989SVitaly Wool 	z3fold_page_lock(zhdr);
15721f862989SVitaly Wool 	if (test_bit(NEEDS_COMPACTING, &page->private) ||
15731f862989SVitaly Wool 	    test_bit(PAGE_STALE, &page->private))
15741f862989SVitaly Wool 		goto out;
15751f862989SVitaly Wool 
15764a3ac931SVitaly Wool 	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
15774a3ac931SVitaly Wool 		goto out;
15781f862989SVitaly Wool 
1579dcf5aedbSVitaly Wool 	if (test_and_set_bit(PAGE_CLAIMED, &page->private))
1580dcf5aedbSVitaly Wool 		goto out;
15814a3ac931SVitaly Wool 	pool = zhdr_to_pool(zhdr);
15824a3ac931SVitaly Wool 	spin_lock(&pool->lock);
15831f862989SVitaly Wool 	if (!list_empty(&zhdr->buddy))
15841f862989SVitaly Wool 		list_del_init(&zhdr->buddy);
15851f862989SVitaly Wool 	if (!list_empty(&page->lru))
15864a3ac931SVitaly Wool 		list_del_init(&page->lru);
15871f862989SVitaly Wool 	spin_unlock(&pool->lock);
15884a3ac931SVitaly Wool 
15894a3ac931SVitaly Wool 	kref_get(&zhdr->refcount);
15901f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
15911f862989SVitaly Wool 	return true;
15924a3ac931SVitaly Wool 
15931f862989SVitaly Wool out:
15941f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
15951f862989SVitaly Wool 	return false;
15961f862989SVitaly Wool }
15971f862989SVitaly Wool 
15981f862989SVitaly Wool static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage,
15991f862989SVitaly Wool 			       struct page *page, enum migrate_mode mode)
16001f862989SVitaly Wool {
16011f862989SVitaly Wool 	struct z3fold_header *zhdr, *new_zhdr;
16021f862989SVitaly Wool 	struct z3fold_pool *pool;
16031f862989SVitaly Wool 	struct address_space *new_mapping;
16041f862989SVitaly Wool 
16051f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageMovable(page), page);
16061f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
1607dcf5aedbSVitaly Wool 	VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
1608810481a2SHenry Burns 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
16091f862989SVitaly Wool 
16101f862989SVitaly Wool 	zhdr = page_address(page);
16111f862989SVitaly Wool 	pool = zhdr_to_pool(zhdr);
16121f862989SVitaly Wool 
1613dcf5aedbSVitaly Wool 	if (!z3fold_page_trylock(zhdr))
16141f862989SVitaly Wool 		return -EAGAIN;
16154a3ac931SVitaly Wool 	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
16161f862989SVitaly Wool 		z3fold_page_unlock(zhdr);
1617dcf5aedbSVitaly Wool 		clear_bit(PAGE_CLAIMED, &page->private);
16181f862989SVitaly Wool 		return -EBUSY;
16191f862989SVitaly Wool 	}
1620c92d2f38SHenry Burns 	if (work_pending(&zhdr->work)) {
1621c92d2f38SHenry Burns 		z3fold_page_unlock(zhdr);
1622c92d2f38SHenry Burns 		return -EAGAIN;
1623c92d2f38SHenry Burns 	}
16241f862989SVitaly Wool 	new_zhdr = page_address(newpage);
16251f862989SVitaly Wool 	memcpy(new_zhdr, zhdr, PAGE_SIZE);
16261f862989SVitaly Wool 	newpage->private = page->private;
16271f862989SVitaly Wool 	page->private = 0;
16281f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
16291f862989SVitaly Wool 	spin_lock_init(&new_zhdr->page_lock);
1630c92d2f38SHenry Burns 	INIT_WORK(&new_zhdr->work, compact_page_work);
1631c92d2f38SHenry Burns 	/*
1632c92d2f38SHenry Burns 	 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty,
1633c92d2f38SHenry Burns 	 * so we only have to reinitialize it.
1634c92d2f38SHenry Burns 	 */
1635c92d2f38SHenry Burns 	INIT_LIST_HEAD(&new_zhdr->buddy);
16361f862989SVitaly Wool 	new_mapping = page_mapping(page);
16371f862989SVitaly Wool 	__ClearPageMovable(page);
16381f862989SVitaly Wool 	ClearPagePrivate(page);
16391f862989SVitaly Wool 
16401f862989SVitaly Wool 	get_page(newpage);
16411f862989SVitaly Wool 	z3fold_page_lock(new_zhdr);
16421f862989SVitaly Wool 	if (new_zhdr->first_chunks)
16431f862989SVitaly Wool 		encode_handle(new_zhdr, FIRST);
16441f862989SVitaly Wool 	if (new_zhdr->last_chunks)
16451f862989SVitaly Wool 		encode_handle(new_zhdr, LAST);
16461f862989SVitaly Wool 	if (new_zhdr->middle_chunks)
16471f862989SVitaly Wool 		encode_handle(new_zhdr, MIDDLE);
16481f862989SVitaly Wool 	set_bit(NEEDS_COMPACTING, &newpage->private);
16491f862989SVitaly Wool 	new_zhdr->cpu = smp_processor_id();
16501f862989SVitaly Wool 	spin_lock(&pool->lock);
16511f862989SVitaly Wool 	list_add(&newpage->lru, &pool->lru);
16521f862989SVitaly Wool 	spin_unlock(&pool->lock);
16531f862989SVitaly Wool 	__SetPageMovable(newpage, new_mapping);
16541f862989SVitaly Wool 	z3fold_page_unlock(new_zhdr);
16551f862989SVitaly Wool 
16561f862989SVitaly Wool 	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
16571f862989SVitaly Wool 
16581f862989SVitaly Wool 	page_mapcount_reset(page);
1659dcf5aedbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
16601f862989SVitaly Wool 	put_page(page);
16611f862989SVitaly Wool 	return 0;
16621f862989SVitaly Wool }
16631f862989SVitaly Wool 
16641f862989SVitaly Wool static void z3fold_page_putback(struct page *page)
16651f862989SVitaly Wool {
16661f862989SVitaly Wool 	struct z3fold_header *zhdr;
16671f862989SVitaly Wool 	struct z3fold_pool *pool;
16681f862989SVitaly Wool 
16691f862989SVitaly Wool 	zhdr = page_address(page);
16701f862989SVitaly Wool 	pool = zhdr_to_pool(zhdr);
16711f862989SVitaly Wool 
16721f862989SVitaly Wool 	z3fold_page_lock(zhdr);
16731f862989SVitaly Wool 	if (!list_empty(&zhdr->buddy))
16741f862989SVitaly Wool 		list_del_init(&zhdr->buddy);
16751f862989SVitaly Wool 	INIT_LIST_HEAD(&page->lru);
16761f862989SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
16771f862989SVitaly Wool 		atomic64_dec(&pool->pages_nr);
16781f862989SVitaly Wool 		return;
16791f862989SVitaly Wool 	}
16801f862989SVitaly Wool 	spin_lock(&pool->lock);
16811f862989SVitaly Wool 	list_add(&page->lru, &pool->lru);
16821f862989SVitaly Wool 	spin_unlock(&pool->lock);
1683dcf5aedbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
16841f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
16851f862989SVitaly Wool }
16861f862989SVitaly Wool 
16871f862989SVitaly Wool static const struct address_space_operations z3fold_aops = {
16881f862989SVitaly Wool 	.isolate_page = z3fold_page_isolate,
16891f862989SVitaly Wool 	.migratepage = z3fold_page_migrate,
16901f862989SVitaly Wool 	.putback_page = z3fold_page_putback,
16911f862989SVitaly Wool };
16921f862989SVitaly Wool 
16939a001fc1SVitaly Wool /*****************
16949a001fc1SVitaly Wool  * zpool
16959a001fc1SVitaly Wool  ****************/
16969a001fc1SVitaly Wool 
16979a001fc1SVitaly Wool static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
16989a001fc1SVitaly Wool {
16999a001fc1SVitaly Wool 	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
17009a001fc1SVitaly Wool 		return pool->zpool_ops->evict(pool->zpool, handle);
17019a001fc1SVitaly Wool 	else
17029a001fc1SVitaly Wool 		return -ENOENT;
17039a001fc1SVitaly Wool }
17049a001fc1SVitaly Wool 
17059a001fc1SVitaly Wool static const struct z3fold_ops z3fold_zpool_ops = {
17069a001fc1SVitaly Wool 	.evict =	z3fold_zpool_evict
17079a001fc1SVitaly Wool };
17089a001fc1SVitaly Wool 
17099a001fc1SVitaly Wool static void *z3fold_zpool_create(const char *name, gfp_t gfp,
17109a001fc1SVitaly Wool 			       const struct zpool_ops *zpool_ops,
17119a001fc1SVitaly Wool 			       struct zpool *zpool)
17129a001fc1SVitaly Wool {
17139a001fc1SVitaly Wool 	struct z3fold_pool *pool;
17149a001fc1SVitaly Wool 
1715d30561c5SVitaly Wool 	pool = z3fold_create_pool(name, gfp,
1716d30561c5SVitaly Wool 				zpool_ops ? &z3fold_zpool_ops : NULL);
17179a001fc1SVitaly Wool 	if (pool) {
17189a001fc1SVitaly Wool 		pool->zpool = zpool;
17199a001fc1SVitaly Wool 		pool->zpool_ops = zpool_ops;
17209a001fc1SVitaly Wool 	}
17219a001fc1SVitaly Wool 	return pool;
17229a001fc1SVitaly Wool }
17239a001fc1SVitaly Wool 
17249a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool)
17259a001fc1SVitaly Wool {
17269a001fc1SVitaly Wool 	z3fold_destroy_pool(pool);
17279a001fc1SVitaly Wool }
17289a001fc1SVitaly Wool 
17299a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp,
17309a001fc1SVitaly Wool 			unsigned long *handle)
17319a001fc1SVitaly Wool {
17329a001fc1SVitaly Wool 	return z3fold_alloc(pool, size, gfp, handle);
17339a001fc1SVitaly Wool }
17349a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle)
17359a001fc1SVitaly Wool {
17369a001fc1SVitaly Wool 	z3fold_free(pool, handle);
17379a001fc1SVitaly Wool }
17389a001fc1SVitaly Wool 
17399a001fc1SVitaly Wool static int z3fold_zpool_shrink(void *pool, unsigned int pages,
17409a001fc1SVitaly Wool 			unsigned int *reclaimed)
17419a001fc1SVitaly Wool {
17429a001fc1SVitaly Wool 	unsigned int total = 0;
17439a001fc1SVitaly Wool 	int ret = -EINVAL;
17449a001fc1SVitaly Wool 
17459a001fc1SVitaly Wool 	while (total < pages) {
17469a001fc1SVitaly Wool 		ret = z3fold_reclaim_page(pool, 8);
17479a001fc1SVitaly Wool 		if (ret < 0)
17489a001fc1SVitaly Wool 			break;
17499a001fc1SVitaly Wool 		total++;
17509a001fc1SVitaly Wool 	}
17519a001fc1SVitaly Wool 
17529a001fc1SVitaly Wool 	if (reclaimed)
17539a001fc1SVitaly Wool 		*reclaimed = total;
17549a001fc1SVitaly Wool 
17559a001fc1SVitaly Wool 	return ret;
17569a001fc1SVitaly Wool }
17579a001fc1SVitaly Wool 
17589a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle,
17599a001fc1SVitaly Wool 			enum zpool_mapmode mm)
17609a001fc1SVitaly Wool {
17619a001fc1SVitaly Wool 	return z3fold_map(pool, handle);
17629a001fc1SVitaly Wool }
17639a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle)
17649a001fc1SVitaly Wool {
17659a001fc1SVitaly Wool 	z3fold_unmap(pool, handle);
17669a001fc1SVitaly Wool }
17679a001fc1SVitaly Wool 
17689a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool)
17699a001fc1SVitaly Wool {
17709a001fc1SVitaly Wool 	return z3fold_get_pool_size(pool) * PAGE_SIZE;
17719a001fc1SVitaly Wool }
17729a001fc1SVitaly Wool 
17739a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = {
17749a001fc1SVitaly Wool 	.type =		"z3fold",
1775e818e820STian Tao 	.sleep_mapped = true,
17769a001fc1SVitaly Wool 	.owner =	THIS_MODULE,
17779a001fc1SVitaly Wool 	.create =	z3fold_zpool_create,
17789a001fc1SVitaly Wool 	.destroy =	z3fold_zpool_destroy,
17799a001fc1SVitaly Wool 	.malloc =	z3fold_zpool_malloc,
17809a001fc1SVitaly Wool 	.free =		z3fold_zpool_free,
17819a001fc1SVitaly Wool 	.shrink =	z3fold_zpool_shrink,
17829a001fc1SVitaly Wool 	.map =		z3fold_zpool_map,
17839a001fc1SVitaly Wool 	.unmap =	z3fold_zpool_unmap,
17849a001fc1SVitaly Wool 	.total_size =	z3fold_zpool_total_size,
17859a001fc1SVitaly Wool };
17869a001fc1SVitaly Wool 
17879a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold");
17889a001fc1SVitaly Wool 
17899a001fc1SVitaly Wool static int __init init_z3fold(void)
17909a001fc1SVitaly Wool {
17911f862989SVitaly Wool 	int ret;
17921f862989SVitaly Wool 
1793014284a0SMiaohe Lin 	/*
1794014284a0SMiaohe Lin 	 * Make sure the z3fold header is not larger than the page size and
1795014284a0SMiaohe Lin 	 * there has remaining spaces for its buddy.
1796014284a0SMiaohe Lin 	 */
1797014284a0SMiaohe Lin 	BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE);
17981f862989SVitaly Wool 	ret = z3fold_mount();
17991f862989SVitaly Wool 	if (ret)
18001f862989SVitaly Wool 		return ret;
18011f862989SVitaly Wool 
18029a001fc1SVitaly Wool 	zpool_register_driver(&z3fold_zpool_driver);
18039a001fc1SVitaly Wool 
18049a001fc1SVitaly Wool 	return 0;
18059a001fc1SVitaly Wool }
18069a001fc1SVitaly Wool 
18079a001fc1SVitaly Wool static void __exit exit_z3fold(void)
18089a001fc1SVitaly Wool {
18091f862989SVitaly Wool 	z3fold_unmount();
18109a001fc1SVitaly Wool 	zpool_unregister_driver(&z3fold_zpool_driver);
18119a001fc1SVitaly Wool }
18129a001fc1SVitaly Wool 
18139a001fc1SVitaly Wool module_init(init_z3fold);
18149a001fc1SVitaly Wool module_exit(exit_z3fold);
18159a001fc1SVitaly Wool 
18169a001fc1SVitaly Wool MODULE_LICENSE("GPL");
18179a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>");
18189a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");
1819