xref: /linux/mm/z3fold.c (revision 3f9d2b5766aea06042630ac60b7316fd0cebf06f)
109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
29a001fc1SVitaly Wool /*
39a001fc1SVitaly Wool  * z3fold.c
49a001fc1SVitaly Wool  *
59a001fc1SVitaly Wool  * Author: Vitaly Wool <vitaly.wool@konsulko.com>
69a001fc1SVitaly Wool  * Copyright (C) 2016, Sony Mobile Communications Inc.
79a001fc1SVitaly Wool  *
89a001fc1SVitaly Wool  * This implementation is based on zbud written by Seth Jennings.
99a001fc1SVitaly Wool  *
109a001fc1SVitaly Wool  * z3fold is an special purpose allocator for storing compressed pages. It
119a001fc1SVitaly Wool  * can store up to three compressed pages per page which improves the
129a001fc1SVitaly Wool  * compression ratio of zbud while retaining its main concepts (e. g. always
139a001fc1SVitaly Wool  * storing an integral number of objects per page) and simplicity.
149a001fc1SVitaly Wool  * It still has simple and deterministic reclaim properties that make it
159a001fc1SVitaly Wool  * preferable to a higher density approach (with no requirement on integral
169a001fc1SVitaly Wool  * number of object per page) when reclaim is used.
179a001fc1SVitaly Wool  *
189a001fc1SVitaly Wool  * As in zbud, pages are divided into "chunks".  The size of the chunks is
199a001fc1SVitaly Wool  * fixed at compile time and is determined by NCHUNKS_ORDER below.
209a001fc1SVitaly Wool  *
219a001fc1SVitaly Wool  * z3fold doesn't export any API and is meant to be used via zpool API.
229a001fc1SVitaly Wool  */
239a001fc1SVitaly Wool 
249a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
259a001fc1SVitaly Wool 
269a001fc1SVitaly Wool #include <linux/atomic.h>
27d30561c5SVitaly Wool #include <linux/sched.h>
281f862989SVitaly Wool #include <linux/cpumask.h>
299a001fc1SVitaly Wool #include <linux/list.h>
309a001fc1SVitaly Wool #include <linux/mm.h>
319a001fc1SVitaly Wool #include <linux/module.h>
321f862989SVitaly Wool #include <linux/page-flags.h>
331f862989SVitaly Wool #include <linux/migrate.h>
341f862989SVitaly Wool #include <linux/node.h>
351f862989SVitaly Wool #include <linux/compaction.h>
36d30561c5SVitaly Wool #include <linux/percpu.h>
371f862989SVitaly Wool #include <linux/mount.h>
38ea8157abSDavid Howells #include <linux/pseudo_fs.h>
391f862989SVitaly Wool #include <linux/fs.h>
409a001fc1SVitaly Wool #include <linux/preempt.h>
41d30561c5SVitaly Wool #include <linux/workqueue.h>
429a001fc1SVitaly Wool #include <linux/slab.h>
439a001fc1SVitaly Wool #include <linux/spinlock.h>
449a001fc1SVitaly Wool #include <linux/zpool.h>
45ea8157abSDavid Howells #include <linux/magic.h>
469a001fc1SVitaly Wool 
479a001fc1SVitaly Wool /*
489a001fc1SVitaly Wool  * NCHUNKS_ORDER determines the internal allocation granularity, effectively
499a001fc1SVitaly Wool  * adjusting internal fragmentation.  It also determines the number of
509a001fc1SVitaly Wool  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
51ede93213SVitaly Wool  * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
52ede93213SVitaly Wool  * in the beginning of an allocated page are occupied by z3fold header, so
53ede93213SVitaly Wool  * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
54ede93213SVitaly Wool  * which shows the max number of free chunks in z3fold page, also there will
55ede93213SVitaly Wool  * be 63, or 62, respectively, freelists per pool.
569a001fc1SVitaly Wool  */
579a001fc1SVitaly Wool #define NCHUNKS_ORDER	6
589a001fc1SVitaly Wool 
599a001fc1SVitaly Wool #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
609a001fc1SVitaly Wool #define CHUNK_SIZE	(1 << CHUNK_SHIFT)
61ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
62ede93213SVitaly Wool #define ZHDR_CHUNKS	(ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
63ede93213SVitaly Wool #define TOTAL_CHUNKS	(PAGE_SIZE >> CHUNK_SHIFT)
649a001fc1SVitaly Wool #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
659a001fc1SVitaly Wool 
66f201ebd8Szhong jiang #define BUDDY_MASK	(0x3)
67ca0246bbSVitaly Wool #define BUDDY_SHIFT	2
687c2b8baaSVitaly Wool #define SLOTS_ALIGN	(0x40)
697c2b8baaSVitaly Wool 
707c2b8baaSVitaly Wool /*****************
717c2b8baaSVitaly Wool  * Structures
727c2b8baaSVitaly Wool *****************/
737c2b8baaSVitaly Wool struct z3fold_pool;
747c2b8baaSVitaly Wool struct z3fold_ops {
757c2b8baaSVitaly Wool 	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
767c2b8baaSVitaly Wool };
777c2b8baaSVitaly Wool 
787c2b8baaSVitaly Wool enum buddy {
797c2b8baaSVitaly Wool 	HEADLESS = 0,
807c2b8baaSVitaly Wool 	FIRST,
817c2b8baaSVitaly Wool 	MIDDLE,
827c2b8baaSVitaly Wool 	LAST,
837c2b8baaSVitaly Wool 	BUDDIES_MAX = LAST
847c2b8baaSVitaly Wool };
857c2b8baaSVitaly Wool 
867c2b8baaSVitaly Wool struct z3fold_buddy_slots {
877c2b8baaSVitaly Wool 	/*
887c2b8baaSVitaly Wool 	 * we are using BUDDY_MASK in handle_to_buddy etc. so there should
897c2b8baaSVitaly Wool 	 * be enough slots to hold all possible variants
907c2b8baaSVitaly Wool 	 */
917c2b8baaSVitaly Wool 	unsigned long slot[BUDDY_MASK + 1];
927c2b8baaSVitaly Wool 	unsigned long pool; /* back link + flags */
937c2b8baaSVitaly Wool };
947c2b8baaSVitaly Wool #define HANDLE_FLAG_MASK	(0x03)
957c2b8baaSVitaly Wool 
967c2b8baaSVitaly Wool /*
977c2b8baaSVitaly Wool  * struct z3fold_header - z3fold page metadata occupying first chunks of each
987c2b8baaSVitaly Wool  *			z3fold page, except for HEADLESS pages
997c2b8baaSVitaly Wool  * @buddy:		links the z3fold page into the relevant list in the
1007c2b8baaSVitaly Wool  *			pool
1017c2b8baaSVitaly Wool  * @page_lock:		per-page lock
1027c2b8baaSVitaly Wool  * @refcount:		reference count for the z3fold page
1037c2b8baaSVitaly Wool  * @work:		work_struct for page layout optimization
1047c2b8baaSVitaly Wool  * @slots:		pointer to the structure holding buddy slots
105bb9a374dSVitaly Wool  * @pool:		pointer to the containing pool
1067c2b8baaSVitaly Wool  * @cpu:		CPU which this page "belongs" to
1077c2b8baaSVitaly Wool  * @first_chunks:	the size of the first buddy in chunks, 0 if free
1087c2b8baaSVitaly Wool  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
1097c2b8baaSVitaly Wool  * @last_chunks:	the size of the last buddy in chunks, 0 if free
1107c2b8baaSVitaly Wool  * @first_num:		the starting number (for the first handle)
1111f862989SVitaly Wool  * @mapped_count:	the number of objects currently mapped
1127c2b8baaSVitaly Wool  */
1137c2b8baaSVitaly Wool struct z3fold_header {
1147c2b8baaSVitaly Wool 	struct list_head buddy;
1157c2b8baaSVitaly Wool 	spinlock_t page_lock;
1167c2b8baaSVitaly Wool 	struct kref refcount;
1177c2b8baaSVitaly Wool 	struct work_struct work;
1187c2b8baaSVitaly Wool 	struct z3fold_buddy_slots *slots;
119bb9a374dSVitaly Wool 	struct z3fold_pool *pool;
1207c2b8baaSVitaly Wool 	short cpu;
1217c2b8baaSVitaly Wool 	unsigned short first_chunks;
1227c2b8baaSVitaly Wool 	unsigned short middle_chunks;
1237c2b8baaSVitaly Wool 	unsigned short last_chunks;
1247c2b8baaSVitaly Wool 	unsigned short start_middle;
1257c2b8baaSVitaly Wool 	unsigned short first_num:2;
1261f862989SVitaly Wool 	unsigned short mapped_count:2;
1277c2b8baaSVitaly Wool };
1289a001fc1SVitaly Wool 
1299a001fc1SVitaly Wool /**
1309a001fc1SVitaly Wool  * struct z3fold_pool - stores metadata for each z3fold pool
131d30561c5SVitaly Wool  * @name:	pool name
132d30561c5SVitaly Wool  * @lock:	protects pool unbuddied/lru lists
133d30561c5SVitaly Wool  * @stale_lock:	protects pool stale page list
134d30561c5SVitaly Wool  * @unbuddied:	per-cpu array of lists tracking z3fold pages that contain 2-
135d30561c5SVitaly Wool  *		buddies; the list each z3fold page is added to depends on
136d30561c5SVitaly Wool  *		the size of its free region.
1379a001fc1SVitaly Wool  * @lru:	list tracking the z3fold pages in LRU order by most recently
1389a001fc1SVitaly Wool  *		added buddy.
139d30561c5SVitaly Wool  * @stale:	list of pages marked for freeing
1409a001fc1SVitaly Wool  * @pages_nr:	number of z3fold pages in the pool.
1417c2b8baaSVitaly Wool  * @c_handle:	cache for z3fold_buddy_slots allocation
1429a001fc1SVitaly Wool  * @ops:	pointer to a structure of user defined operations specified at
1439a001fc1SVitaly Wool  *		pool creation time.
144d30561c5SVitaly Wool  * @compact_wq:	workqueue for page layout background optimization
145d30561c5SVitaly Wool  * @release_wq:	workqueue for safe page release
146d30561c5SVitaly Wool  * @work:	work_struct for safe page release
1471f862989SVitaly Wool  * @inode:	inode for z3fold pseudo filesystem
1489a001fc1SVitaly Wool  *
1499a001fc1SVitaly Wool  * This structure is allocated at pool creation time and maintains metadata
1509a001fc1SVitaly Wool  * pertaining to a particular z3fold pool.
1519a001fc1SVitaly Wool  */
1529a001fc1SVitaly Wool struct z3fold_pool {
153d30561c5SVitaly Wool 	const char *name;
1549a001fc1SVitaly Wool 	spinlock_t lock;
155d30561c5SVitaly Wool 	spinlock_t stale_lock;
156d30561c5SVitaly Wool 	struct list_head *unbuddied;
1579a001fc1SVitaly Wool 	struct list_head lru;
158d30561c5SVitaly Wool 	struct list_head stale;
15912d59ae6SVitaly Wool 	atomic64_t pages_nr;
1607c2b8baaSVitaly Wool 	struct kmem_cache *c_handle;
1619a001fc1SVitaly Wool 	const struct z3fold_ops *ops;
1629a001fc1SVitaly Wool 	struct zpool *zpool;
1639a001fc1SVitaly Wool 	const struct zpool_ops *zpool_ops;
164d30561c5SVitaly Wool 	struct workqueue_struct *compact_wq;
165d30561c5SVitaly Wool 	struct workqueue_struct *release_wq;
166d30561c5SVitaly Wool 	struct work_struct work;
1671f862989SVitaly Wool 	struct inode *inode;
1689a001fc1SVitaly Wool };
1699a001fc1SVitaly Wool 
1709a001fc1SVitaly Wool /*
1719a001fc1SVitaly Wool  * Internal z3fold page flags
1729a001fc1SVitaly Wool  */
1739a001fc1SVitaly Wool enum z3fold_page_flags {
1745a27aa82SVitaly Wool 	PAGE_HEADLESS = 0,
1759a001fc1SVitaly Wool 	MIDDLE_CHUNK_MAPPED,
176d30561c5SVitaly Wool 	NEEDS_COMPACTING,
1776098d7e1SVitaly Wool 	PAGE_STALE,
178ca0246bbSVitaly Wool 	PAGE_CLAIMED, /* by either reclaim or free */
1799a001fc1SVitaly Wool };
1809a001fc1SVitaly Wool 
1819a001fc1SVitaly Wool /*****************
1829a001fc1SVitaly Wool  * Helpers
1839a001fc1SVitaly Wool *****************/
1849a001fc1SVitaly Wool 
1859a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */
1869a001fc1SVitaly Wool static int size_to_chunks(size_t size)
1879a001fc1SVitaly Wool {
1889a001fc1SVitaly Wool 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
1899a001fc1SVitaly Wool }
1909a001fc1SVitaly Wool 
1919a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \
1929a001fc1SVitaly Wool 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
1939a001fc1SVitaly Wool 
194d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w);
195d30561c5SVitaly Wool 
196bb9f6f63SVitaly Wool static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
197bb9f6f63SVitaly Wool 							gfp_t gfp)
1987c2b8baaSVitaly Wool {
199f1549cb5SHenry Burns 	struct z3fold_buddy_slots *slots;
200f1549cb5SHenry Burns 
201f1549cb5SHenry Burns 	slots = kmem_cache_alloc(pool->c_handle,
202f1549cb5SHenry Burns 				 (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE)));
2037c2b8baaSVitaly Wool 
2047c2b8baaSVitaly Wool 	if (slots) {
2057c2b8baaSVitaly Wool 		memset(slots->slot, 0, sizeof(slots->slot));
2067c2b8baaSVitaly Wool 		slots->pool = (unsigned long)pool;
2077c2b8baaSVitaly Wool 	}
2087c2b8baaSVitaly Wool 
2097c2b8baaSVitaly Wool 	return slots;
2107c2b8baaSVitaly Wool }
2117c2b8baaSVitaly Wool 
2127c2b8baaSVitaly Wool static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s)
2137c2b8baaSVitaly Wool {
2147c2b8baaSVitaly Wool 	return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK);
2157c2b8baaSVitaly Wool }
2167c2b8baaSVitaly Wool 
2177c2b8baaSVitaly Wool static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle)
2187c2b8baaSVitaly Wool {
2197c2b8baaSVitaly Wool 	return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1));
2207c2b8baaSVitaly Wool }
2217c2b8baaSVitaly Wool 
2227c2b8baaSVitaly Wool static inline void free_handle(unsigned long handle)
2237c2b8baaSVitaly Wool {
2247c2b8baaSVitaly Wool 	struct z3fold_buddy_slots *slots;
2257c2b8baaSVitaly Wool 	int i;
2267c2b8baaSVitaly Wool 	bool is_free;
2277c2b8baaSVitaly Wool 
2287c2b8baaSVitaly Wool 	if (handle & (1 << PAGE_HEADLESS))
2297c2b8baaSVitaly Wool 		return;
2307c2b8baaSVitaly Wool 
2317c2b8baaSVitaly Wool 	WARN_ON(*(unsigned long *)handle == 0);
2327c2b8baaSVitaly Wool 	*(unsigned long *)handle = 0;
2337c2b8baaSVitaly Wool 	slots = handle_to_slots(handle);
2347c2b8baaSVitaly Wool 	is_free = true;
2357c2b8baaSVitaly Wool 	for (i = 0; i <= BUDDY_MASK; i++) {
2367c2b8baaSVitaly Wool 		if (slots->slot[i]) {
2377c2b8baaSVitaly Wool 			is_free = false;
2387c2b8baaSVitaly Wool 			break;
2397c2b8baaSVitaly Wool 		}
2407c2b8baaSVitaly Wool 	}
2417c2b8baaSVitaly Wool 
2427c2b8baaSVitaly Wool 	if (is_free) {
2437c2b8baaSVitaly Wool 		struct z3fold_pool *pool = slots_to_pool(slots);
2447c2b8baaSVitaly Wool 
2457c2b8baaSVitaly Wool 		kmem_cache_free(pool->c_handle, slots);
2467c2b8baaSVitaly Wool 	}
2477c2b8baaSVitaly Wool }
2487c2b8baaSVitaly Wool 
249ea8157abSDavid Howells static int z3fold_init_fs_context(struct fs_context *fc)
2501f862989SVitaly Wool {
251ea8157abSDavid Howells 	return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
2521f862989SVitaly Wool }
2531f862989SVitaly Wool 
2541f862989SVitaly Wool static struct file_system_type z3fold_fs = {
2551f862989SVitaly Wool 	.name		= "z3fold",
256ea8157abSDavid Howells 	.init_fs_context = z3fold_init_fs_context,
2571f862989SVitaly Wool 	.kill_sb	= kill_anon_super,
2581f862989SVitaly Wool };
2591f862989SVitaly Wool 
2601f862989SVitaly Wool static struct vfsmount *z3fold_mnt;
2611f862989SVitaly Wool static int z3fold_mount(void)
2621f862989SVitaly Wool {
2631f862989SVitaly Wool 	int ret = 0;
2641f862989SVitaly Wool 
2651f862989SVitaly Wool 	z3fold_mnt = kern_mount(&z3fold_fs);
2661f862989SVitaly Wool 	if (IS_ERR(z3fold_mnt))
2671f862989SVitaly Wool 		ret = PTR_ERR(z3fold_mnt);
2681f862989SVitaly Wool 
2691f862989SVitaly Wool 	return ret;
2701f862989SVitaly Wool }
2711f862989SVitaly Wool 
2721f862989SVitaly Wool static void z3fold_unmount(void)
2731f862989SVitaly Wool {
2741f862989SVitaly Wool 	kern_unmount(z3fold_mnt);
2751f862989SVitaly Wool }
2761f862989SVitaly Wool 
2771f862989SVitaly Wool static const struct address_space_operations z3fold_aops;
2781f862989SVitaly Wool static int z3fold_register_migration(struct z3fold_pool *pool)
2791f862989SVitaly Wool {
2801f862989SVitaly Wool 	pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
2811f862989SVitaly Wool 	if (IS_ERR(pool->inode)) {
2821f862989SVitaly Wool 		pool->inode = NULL;
2831f862989SVitaly Wool 		return 1;
2841f862989SVitaly Wool 	}
2851f862989SVitaly Wool 
2861f862989SVitaly Wool 	pool->inode->i_mapping->private_data = pool;
2871f862989SVitaly Wool 	pool->inode->i_mapping->a_ops = &z3fold_aops;
2881f862989SVitaly Wool 	return 0;
2891f862989SVitaly Wool }
2901f862989SVitaly Wool 
2911f862989SVitaly Wool static void z3fold_unregister_migration(struct z3fold_pool *pool)
2921f862989SVitaly Wool {
2931f862989SVitaly Wool 	if (pool->inode)
2941f862989SVitaly Wool 		iput(pool->inode);
2951f862989SVitaly Wool  }
2961f862989SVitaly Wool 
2979a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */
298d30561c5SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page,
299bb9f6f63SVitaly Wool 					struct z3fold_pool *pool, gfp_t gfp)
3009a001fc1SVitaly Wool {
3019a001fc1SVitaly Wool 	struct z3fold_header *zhdr = page_address(page);
302bb9f6f63SVitaly Wool 	struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
3037c2b8baaSVitaly Wool 
3047c2b8baaSVitaly Wool 	if (!slots)
3057c2b8baaSVitaly Wool 		return NULL;
3069a001fc1SVitaly Wool 
3079a001fc1SVitaly Wool 	INIT_LIST_HEAD(&page->lru);
3089a001fc1SVitaly Wool 	clear_bit(PAGE_HEADLESS, &page->private);
3099a001fc1SVitaly Wool 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
310d30561c5SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
311d30561c5SVitaly Wool 	clear_bit(PAGE_STALE, &page->private);
312ca0246bbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
3139a001fc1SVitaly Wool 
3142f1e5e4dSVitaly Wool 	spin_lock_init(&zhdr->page_lock);
3155a27aa82SVitaly Wool 	kref_init(&zhdr->refcount);
3169a001fc1SVitaly Wool 	zhdr->first_chunks = 0;
3179a001fc1SVitaly Wool 	zhdr->middle_chunks = 0;
3189a001fc1SVitaly Wool 	zhdr->last_chunks = 0;
3199a001fc1SVitaly Wool 	zhdr->first_num = 0;
3209a001fc1SVitaly Wool 	zhdr->start_middle = 0;
321d30561c5SVitaly Wool 	zhdr->cpu = -1;
3227c2b8baaSVitaly Wool 	zhdr->slots = slots;
323bb9a374dSVitaly Wool 	zhdr->pool = pool;
3249a001fc1SVitaly Wool 	INIT_LIST_HEAD(&zhdr->buddy);
325d30561c5SVitaly Wool 	INIT_WORK(&zhdr->work, compact_page_work);
3269a001fc1SVitaly Wool 	return zhdr;
3279a001fc1SVitaly Wool }
3289a001fc1SVitaly Wool 
3299a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */
3301f862989SVitaly Wool static void free_z3fold_page(struct page *page, bool headless)
3319a001fc1SVitaly Wool {
3321f862989SVitaly Wool 	if (!headless) {
3331f862989SVitaly Wool 		lock_page(page);
3341f862989SVitaly Wool 		__ClearPageMovable(page);
3351f862989SVitaly Wool 		unlock_page(page);
3361f862989SVitaly Wool 	}
3371f862989SVitaly Wool 	ClearPagePrivate(page);
3385a27aa82SVitaly Wool 	__free_page(page);
3395a27aa82SVitaly Wool }
3405a27aa82SVitaly Wool 
3412f1e5e4dSVitaly Wool /* Lock a z3fold page */
3422f1e5e4dSVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr)
3432f1e5e4dSVitaly Wool {
3442f1e5e4dSVitaly Wool 	spin_lock(&zhdr->page_lock);
3452f1e5e4dSVitaly Wool }
3462f1e5e4dSVitaly Wool 
34776e32a2aSVitaly Wool /* Try to lock a z3fold page */
34876e32a2aSVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
34976e32a2aSVitaly Wool {
35076e32a2aSVitaly Wool 	return spin_trylock(&zhdr->page_lock);
35176e32a2aSVitaly Wool }
35276e32a2aSVitaly Wool 
3532f1e5e4dSVitaly Wool /* Unlock a z3fold page */
3542f1e5e4dSVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
3552f1e5e4dSVitaly Wool {
3562f1e5e4dSVitaly Wool 	spin_unlock(&zhdr->page_lock);
3572f1e5e4dSVitaly Wool }
3582f1e5e4dSVitaly Wool 
3597c2b8baaSVitaly Wool /* Helper function to build the index */
3607c2b8baaSVitaly Wool static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
3617c2b8baaSVitaly Wool {
3627c2b8baaSVitaly Wool 	return (bud + zhdr->first_num) & BUDDY_MASK;
3637c2b8baaSVitaly Wool }
3647c2b8baaSVitaly Wool 
3659a001fc1SVitaly Wool /*
3669a001fc1SVitaly Wool  * Encodes the handle of a particular buddy within a z3fold page
3679a001fc1SVitaly Wool  * Pool lock should be held as this function accesses first_num
3689a001fc1SVitaly Wool  */
369*3f9d2b57SVitaly Wool static unsigned long __encode_handle(struct z3fold_header *zhdr,
370*3f9d2b57SVitaly Wool 				struct z3fold_buddy_slots *slots,
371*3f9d2b57SVitaly Wool 				enum buddy bud)
3729a001fc1SVitaly Wool {
3737c2b8baaSVitaly Wool 	unsigned long h = (unsigned long)zhdr;
3747c2b8baaSVitaly Wool 	int idx = 0;
3759a001fc1SVitaly Wool 
3767c2b8baaSVitaly Wool 	/*
3777c2b8baaSVitaly Wool 	 * For a headless page, its handle is its pointer with the extra
3787c2b8baaSVitaly Wool 	 * PAGE_HEADLESS bit set
3797c2b8baaSVitaly Wool 	 */
3807c2b8baaSVitaly Wool 	if (bud == HEADLESS)
3817c2b8baaSVitaly Wool 		return h | (1 << PAGE_HEADLESS);
3827c2b8baaSVitaly Wool 
3837c2b8baaSVitaly Wool 	/* otherwise, return pointer to encoded handle */
3847c2b8baaSVitaly Wool 	idx = __idx(zhdr, bud);
3857c2b8baaSVitaly Wool 	h += idx;
386ca0246bbSVitaly Wool 	if (bud == LAST)
3877c2b8baaSVitaly Wool 		h |= (zhdr->last_chunks << BUDDY_SHIFT);
3887c2b8baaSVitaly Wool 
3897c2b8baaSVitaly Wool 	slots->slot[idx] = h;
3907c2b8baaSVitaly Wool 	return (unsigned long)&slots->slot[idx];
3919a001fc1SVitaly Wool }
3929a001fc1SVitaly Wool 
393*3f9d2b57SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
394*3f9d2b57SVitaly Wool {
395*3f9d2b57SVitaly Wool 	return __encode_handle(zhdr, zhdr->slots, bud);
396*3f9d2b57SVitaly Wool }
397*3f9d2b57SVitaly Wool 
3989a001fc1SVitaly Wool /* Returns the z3fold page where a given handle is stored */
3991f862989SVitaly Wool static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
4009a001fc1SVitaly Wool {
4011f862989SVitaly Wool 	unsigned long addr = h;
4027c2b8baaSVitaly Wool 
4037c2b8baaSVitaly Wool 	if (!(addr & (1 << PAGE_HEADLESS)))
4041f862989SVitaly Wool 		addr = *(unsigned long *)h;
4057c2b8baaSVitaly Wool 
4067c2b8baaSVitaly Wool 	return (struct z3fold_header *)(addr & PAGE_MASK);
4079a001fc1SVitaly Wool }
4089a001fc1SVitaly Wool 
409ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */
410ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle)
411ca0246bbSVitaly Wool {
4127c2b8baaSVitaly Wool 	unsigned long addr = *(unsigned long *)handle;
4137c2b8baaSVitaly Wool 
4147c2b8baaSVitaly Wool 	return (addr & ~PAGE_MASK) >> BUDDY_SHIFT;
415ca0246bbSVitaly Wool }
416ca0246bbSVitaly Wool 
417f201ebd8Szhong jiang /*
418f201ebd8Szhong jiang  * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
419f201ebd8Szhong jiang  *  but that doesn't matter. because the masking will result in the
420f201ebd8Szhong jiang  *  correct buddy number.
421f201ebd8Szhong jiang  */
4229a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle)
4239a001fc1SVitaly Wool {
4247c2b8baaSVitaly Wool 	struct z3fold_header *zhdr;
4257c2b8baaSVitaly Wool 	unsigned long addr;
4267c2b8baaSVitaly Wool 
4277c2b8baaSVitaly Wool 	WARN_ON(handle & (1 << PAGE_HEADLESS));
4287c2b8baaSVitaly Wool 	addr = *(unsigned long *)handle;
4297c2b8baaSVitaly Wool 	zhdr = (struct z3fold_header *)(addr & PAGE_MASK);
4307c2b8baaSVitaly Wool 	return (addr - zhdr->first_num) & BUDDY_MASK;
4319a001fc1SVitaly Wool }
4329a001fc1SVitaly Wool 
4339050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
4349050cce1SVitaly Wool {
435bb9a374dSVitaly Wool 	return zhdr->pool;
4369050cce1SVitaly Wool }
4379050cce1SVitaly Wool 
438d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
439d30561c5SVitaly Wool {
440d30561c5SVitaly Wool 	struct page *page = virt_to_page(zhdr);
4419050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
442d30561c5SVitaly Wool 
443d30561c5SVitaly Wool 	WARN_ON(!list_empty(&zhdr->buddy));
444d30561c5SVitaly Wool 	set_bit(PAGE_STALE, &page->private);
44535529357SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
446d30561c5SVitaly Wool 	spin_lock(&pool->lock);
447d30561c5SVitaly Wool 	if (!list_empty(&page->lru))
4481f862989SVitaly Wool 		list_del_init(&page->lru);
449d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
450d30561c5SVitaly Wool 	if (locked)
451d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
452d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
453d30561c5SVitaly Wool 	list_add(&zhdr->buddy, &pool->stale);
454d30561c5SVitaly Wool 	queue_work(pool->release_wq, &pool->work);
455d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
456d30561c5SVitaly Wool }
457d30561c5SVitaly Wool 
458d30561c5SVitaly Wool static void __attribute__((__unused__))
459d30561c5SVitaly Wool 			release_z3fold_page(struct kref *ref)
460d30561c5SVitaly Wool {
461d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
462d30561c5SVitaly Wool 						refcount);
463d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, false);
464d30561c5SVitaly Wool }
465d30561c5SVitaly Wool 
466d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref)
467d30561c5SVitaly Wool {
468d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
469d30561c5SVitaly Wool 						refcount);
470d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
471d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
472d30561c5SVitaly Wool }
473d30561c5SVitaly Wool 
474d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref)
475d30561c5SVitaly Wool {
476d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
477d30561c5SVitaly Wool 					       refcount);
4789050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
4799050cce1SVitaly Wool 	spin_lock(&pool->lock);
480d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
4819050cce1SVitaly Wool 	spin_unlock(&pool->lock);
482d30561c5SVitaly Wool 
483d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
484d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
485d30561c5SVitaly Wool }
486d30561c5SVitaly Wool 
487d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w)
488d30561c5SVitaly Wool {
489d30561c5SVitaly Wool 	struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work);
490d30561c5SVitaly Wool 
491d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
492d30561c5SVitaly Wool 	while (!list_empty(&pool->stale)) {
493d30561c5SVitaly Wool 		struct z3fold_header *zhdr = list_first_entry(&pool->stale,
494d30561c5SVitaly Wool 						struct z3fold_header, buddy);
495d30561c5SVitaly Wool 		struct page *page = virt_to_page(zhdr);
496d30561c5SVitaly Wool 
497d30561c5SVitaly Wool 		list_del(&zhdr->buddy);
498d30561c5SVitaly Wool 		if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
499d30561c5SVitaly Wool 			continue;
500d30561c5SVitaly Wool 		spin_unlock(&pool->stale_lock);
501d30561c5SVitaly Wool 		cancel_work_sync(&zhdr->work);
5021f862989SVitaly Wool 		free_z3fold_page(page, false);
503d30561c5SVitaly Wool 		cond_resched();
504d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
505d30561c5SVitaly Wool 	}
506d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
507d30561c5SVitaly Wool }
508d30561c5SVitaly Wool 
5099a001fc1SVitaly Wool /*
5109a001fc1SVitaly Wool  * Returns the number of free chunks in a z3fold page.
5119a001fc1SVitaly Wool  * NB: can't be used with HEADLESS pages.
5129a001fc1SVitaly Wool  */
5139a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr)
5149a001fc1SVitaly Wool {
5159a001fc1SVitaly Wool 	int nfree;
5169a001fc1SVitaly Wool 	/*
5179a001fc1SVitaly Wool 	 * If there is a middle object, pick up the bigger free space
5189a001fc1SVitaly Wool 	 * either before or after it. Otherwise just subtract the number
5199a001fc1SVitaly Wool 	 * of chunks occupied by the first and the last objects.
5209a001fc1SVitaly Wool 	 */
5219a001fc1SVitaly Wool 	if (zhdr->middle_chunks != 0) {
5229a001fc1SVitaly Wool 		int nfree_before = zhdr->first_chunks ?
523ede93213SVitaly Wool 			0 : zhdr->start_middle - ZHDR_CHUNKS;
5249a001fc1SVitaly Wool 		int nfree_after = zhdr->last_chunks ?
525ede93213SVitaly Wool 			0 : TOTAL_CHUNKS -
526ede93213SVitaly Wool 				(zhdr->start_middle + zhdr->middle_chunks);
5279a001fc1SVitaly Wool 		nfree = max(nfree_before, nfree_after);
5289a001fc1SVitaly Wool 	} else
5299a001fc1SVitaly Wool 		nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
5309a001fc1SVitaly Wool 	return nfree;
5319a001fc1SVitaly Wool }
5329a001fc1SVitaly Wool 
5339050cce1SVitaly Wool /* Add to the appropriate unbuddied list */
5349050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool,
5359050cce1SVitaly Wool 				struct z3fold_header *zhdr)
5369050cce1SVitaly Wool {
5379050cce1SVitaly Wool 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
5389050cce1SVitaly Wool 			zhdr->middle_chunks == 0) {
5399050cce1SVitaly Wool 		struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
5409050cce1SVitaly Wool 
5419050cce1SVitaly Wool 		int freechunks = num_free_chunks(zhdr);
5429050cce1SVitaly Wool 		spin_lock(&pool->lock);
5439050cce1SVitaly Wool 		list_add(&zhdr->buddy, &unbuddied[freechunks]);
5449050cce1SVitaly Wool 		spin_unlock(&pool->lock);
5459050cce1SVitaly Wool 		zhdr->cpu = smp_processor_id();
5469050cce1SVitaly Wool 		put_cpu_ptr(pool->unbuddied);
5479050cce1SVitaly Wool 	}
5489050cce1SVitaly Wool }
5499050cce1SVitaly Wool 
550ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr,
551ede93213SVitaly Wool 				unsigned short dst_chunk)
552ede93213SVitaly Wool {
553ede93213SVitaly Wool 	void *beg = zhdr;
554ede93213SVitaly Wool 	return memmove(beg + (dst_chunk << CHUNK_SHIFT),
555ede93213SVitaly Wool 		       beg + (zhdr->start_middle << CHUNK_SHIFT),
556ede93213SVitaly Wool 		       zhdr->middle_chunks << CHUNK_SHIFT);
557ede93213SVitaly Wool }
558ede93213SVitaly Wool 
5591b096e5aSVitaly Wool #define BIG_CHUNK_GAP	3
5609a001fc1SVitaly Wool /* Has to be called with lock held */
5619a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr)
5629a001fc1SVitaly Wool {
5639a001fc1SVitaly Wool 	struct page *page = virt_to_page(zhdr);
5649a001fc1SVitaly Wool 
565ede93213SVitaly Wool 	if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
566ede93213SVitaly Wool 		return 0; /* can't move middle chunk, it's used */
5679a001fc1SVitaly Wool 
5681f862989SVitaly Wool 	if (unlikely(PageIsolated(page)))
5691f862989SVitaly Wool 		return 0;
5701f862989SVitaly Wool 
571ede93213SVitaly Wool 	if (zhdr->middle_chunks == 0)
572ede93213SVitaly Wool 		return 0; /* nothing to compact */
573ede93213SVitaly Wool 
574ede93213SVitaly Wool 	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
575ede93213SVitaly Wool 		/* move to the beginning */
576ede93213SVitaly Wool 		mchunk_memmove(zhdr, ZHDR_CHUNKS);
5779a001fc1SVitaly Wool 		zhdr->first_chunks = zhdr->middle_chunks;
5789a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
5799a001fc1SVitaly Wool 		zhdr->start_middle = 0;
5809a001fc1SVitaly Wool 		zhdr->first_num++;
581ede93213SVitaly Wool 		return 1;
5829a001fc1SVitaly Wool 	}
5839a001fc1SVitaly Wool 
5841b096e5aSVitaly Wool 	/*
5851b096e5aSVitaly Wool 	 * moving data is expensive, so let's only do that if
5861b096e5aSVitaly Wool 	 * there's substantial gain (at least BIG_CHUNK_GAP chunks)
5871b096e5aSVitaly Wool 	 */
5881b096e5aSVitaly Wool 	if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
5891b096e5aSVitaly Wool 	    zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
5901b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
5911b096e5aSVitaly Wool 		mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
5921b096e5aSVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
5931b096e5aSVitaly Wool 		return 1;
5941b096e5aSVitaly Wool 	} else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
5951b096e5aSVitaly Wool 		   TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
5961b096e5aSVitaly Wool 					+ zhdr->middle_chunks) >=
5971b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
5981b096e5aSVitaly Wool 		unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
5991b096e5aSVitaly Wool 			zhdr->middle_chunks;
6001b096e5aSVitaly Wool 		mchunk_memmove(zhdr, new_start);
6011b096e5aSVitaly Wool 		zhdr->start_middle = new_start;
6021b096e5aSVitaly Wool 		return 1;
6031b096e5aSVitaly Wool 	}
6041b096e5aSVitaly Wool 
6051b096e5aSVitaly Wool 	return 0;
6061b096e5aSVitaly Wool }
6071b096e5aSVitaly Wool 
608d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked)
609d30561c5SVitaly Wool {
6109050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
611d30561c5SVitaly Wool 	struct page *page;
612d30561c5SVitaly Wool 
613d30561c5SVitaly Wool 	page = virt_to_page(zhdr);
614d30561c5SVitaly Wool 	if (locked)
615d30561c5SVitaly Wool 		WARN_ON(z3fold_page_trylock(zhdr));
616d30561c5SVitaly Wool 	else
617d30561c5SVitaly Wool 		z3fold_page_lock(zhdr);
6185d03a661SVitaly Wool 	if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
619d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
620d30561c5SVitaly Wool 		return;
621d30561c5SVitaly Wool 	}
622d30561c5SVitaly Wool 	spin_lock(&pool->lock);
623d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
624d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
625d30561c5SVitaly Wool 
6265d03a661SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
6275d03a661SVitaly Wool 		atomic64_dec(&pool->pages_nr);
6285d03a661SVitaly Wool 		return;
6295d03a661SVitaly Wool 	}
6305d03a661SVitaly Wool 
6311f862989SVitaly Wool 	if (unlikely(PageIsolated(page) ||
632*3f9d2b57SVitaly Wool 		     test_bit(PAGE_CLAIMED, &page->private) ||
6331f862989SVitaly Wool 		     test_bit(PAGE_STALE, &page->private))) {
6341f862989SVitaly Wool 		z3fold_page_unlock(zhdr);
6351f862989SVitaly Wool 		return;
6361f862989SVitaly Wool 	}
6371f862989SVitaly Wool 
638d30561c5SVitaly Wool 	z3fold_compact_page(zhdr);
6399050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
640d30561c5SVitaly Wool 	z3fold_page_unlock(zhdr);
641d30561c5SVitaly Wool }
642d30561c5SVitaly Wool 
643d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w)
644d30561c5SVitaly Wool {
645d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(w, struct z3fold_header,
646d30561c5SVitaly Wool 						work);
647d30561c5SVitaly Wool 
648d30561c5SVitaly Wool 	do_compact_page(zhdr, false);
649d30561c5SVitaly Wool }
650d30561c5SVitaly Wool 
6519050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */
6529050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
6539050cce1SVitaly Wool 						size_t size, bool can_sleep)
6549050cce1SVitaly Wool {
6559050cce1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
6569050cce1SVitaly Wool 	struct page *page;
6579050cce1SVitaly Wool 	struct list_head *unbuddied;
6589050cce1SVitaly Wool 	int chunks = size_to_chunks(size), i;
6599050cce1SVitaly Wool 
6609050cce1SVitaly Wool lookup:
6619050cce1SVitaly Wool 	/* First, try to find an unbuddied z3fold page. */
6629050cce1SVitaly Wool 	unbuddied = get_cpu_ptr(pool->unbuddied);
6639050cce1SVitaly Wool 	for_each_unbuddied_list(i, chunks) {
6649050cce1SVitaly Wool 		struct list_head *l = &unbuddied[i];
6659050cce1SVitaly Wool 
6669050cce1SVitaly Wool 		zhdr = list_first_entry_or_null(READ_ONCE(l),
6679050cce1SVitaly Wool 					struct z3fold_header, buddy);
6689050cce1SVitaly Wool 
6699050cce1SVitaly Wool 		if (!zhdr)
6709050cce1SVitaly Wool 			continue;
6719050cce1SVitaly Wool 
6729050cce1SVitaly Wool 		/* Re-check under lock. */
6739050cce1SVitaly Wool 		spin_lock(&pool->lock);
6749050cce1SVitaly Wool 		l = &unbuddied[i];
6759050cce1SVitaly Wool 		if (unlikely(zhdr != list_first_entry(READ_ONCE(l),
6769050cce1SVitaly Wool 						struct z3fold_header, buddy)) ||
6779050cce1SVitaly Wool 		    !z3fold_page_trylock(zhdr)) {
6789050cce1SVitaly Wool 			spin_unlock(&pool->lock);
6799050cce1SVitaly Wool 			zhdr = NULL;
6809050cce1SVitaly Wool 			put_cpu_ptr(pool->unbuddied);
6819050cce1SVitaly Wool 			if (can_sleep)
6829050cce1SVitaly Wool 				cond_resched();
6839050cce1SVitaly Wool 			goto lookup;
6849050cce1SVitaly Wool 		}
6859050cce1SVitaly Wool 		list_del_init(&zhdr->buddy);
6869050cce1SVitaly Wool 		zhdr->cpu = -1;
6879050cce1SVitaly Wool 		spin_unlock(&pool->lock);
6889050cce1SVitaly Wool 
6899050cce1SVitaly Wool 		page = virt_to_page(zhdr);
6909050cce1SVitaly Wool 		if (test_bit(NEEDS_COMPACTING, &page->private)) {
6919050cce1SVitaly Wool 			z3fold_page_unlock(zhdr);
6929050cce1SVitaly Wool 			zhdr = NULL;
6939050cce1SVitaly Wool 			put_cpu_ptr(pool->unbuddied);
6949050cce1SVitaly Wool 			if (can_sleep)
6959050cce1SVitaly Wool 				cond_resched();
6969050cce1SVitaly Wool 			goto lookup;
6979050cce1SVitaly Wool 		}
6989050cce1SVitaly Wool 
6999050cce1SVitaly Wool 		/*
7009050cce1SVitaly Wool 		 * this page could not be removed from its unbuddied
7019050cce1SVitaly Wool 		 * list while pool lock was held, and then we've taken
7029050cce1SVitaly Wool 		 * page lock so kref_put could not be called before
7039050cce1SVitaly Wool 		 * we got here, so it's safe to just call kref_get()
7049050cce1SVitaly Wool 		 */
7059050cce1SVitaly Wool 		kref_get(&zhdr->refcount);
7069050cce1SVitaly Wool 		break;
7079050cce1SVitaly Wool 	}
7089050cce1SVitaly Wool 	put_cpu_ptr(pool->unbuddied);
7099050cce1SVitaly Wool 
710351618b2SVitaly Wool 	if (!zhdr) {
711351618b2SVitaly Wool 		int cpu;
712351618b2SVitaly Wool 
713351618b2SVitaly Wool 		/* look for _exact_ match on other cpus' lists */
714351618b2SVitaly Wool 		for_each_online_cpu(cpu) {
715351618b2SVitaly Wool 			struct list_head *l;
716351618b2SVitaly Wool 
717351618b2SVitaly Wool 			unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
718351618b2SVitaly Wool 			spin_lock(&pool->lock);
719351618b2SVitaly Wool 			l = &unbuddied[chunks];
720351618b2SVitaly Wool 
721351618b2SVitaly Wool 			zhdr = list_first_entry_or_null(READ_ONCE(l),
722351618b2SVitaly Wool 						struct z3fold_header, buddy);
723351618b2SVitaly Wool 
724351618b2SVitaly Wool 			if (!zhdr || !z3fold_page_trylock(zhdr)) {
725351618b2SVitaly Wool 				spin_unlock(&pool->lock);
726351618b2SVitaly Wool 				zhdr = NULL;
727351618b2SVitaly Wool 				continue;
728351618b2SVitaly Wool 			}
729351618b2SVitaly Wool 			list_del_init(&zhdr->buddy);
730351618b2SVitaly Wool 			zhdr->cpu = -1;
731351618b2SVitaly Wool 			spin_unlock(&pool->lock);
732351618b2SVitaly Wool 
733351618b2SVitaly Wool 			page = virt_to_page(zhdr);
734351618b2SVitaly Wool 			if (test_bit(NEEDS_COMPACTING, &page->private)) {
735351618b2SVitaly Wool 				z3fold_page_unlock(zhdr);
736351618b2SVitaly Wool 				zhdr = NULL;
737351618b2SVitaly Wool 				if (can_sleep)
738351618b2SVitaly Wool 					cond_resched();
739351618b2SVitaly Wool 				continue;
740351618b2SVitaly Wool 			}
741351618b2SVitaly Wool 			kref_get(&zhdr->refcount);
742351618b2SVitaly Wool 			break;
743351618b2SVitaly Wool 		}
744351618b2SVitaly Wool 	}
745351618b2SVitaly Wool 
7469050cce1SVitaly Wool 	return zhdr;
7479050cce1SVitaly Wool }
748d30561c5SVitaly Wool 
749d30561c5SVitaly Wool /*
750d30561c5SVitaly Wool  * API Functions
751d30561c5SVitaly Wool  */
752d30561c5SVitaly Wool 
753d30561c5SVitaly Wool /**
754d30561c5SVitaly Wool  * z3fold_create_pool() - create a new z3fold pool
755d30561c5SVitaly Wool  * @name:	pool name
756d30561c5SVitaly Wool  * @gfp:	gfp flags when allocating the z3fold pool structure
757d30561c5SVitaly Wool  * @ops:	user-defined operations for the z3fold pool
758d30561c5SVitaly Wool  *
759d30561c5SVitaly Wool  * Return: pointer to the new z3fold pool or NULL if the metadata allocation
760d30561c5SVitaly Wool  * failed.
761d30561c5SVitaly Wool  */
762d30561c5SVitaly Wool static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
763d30561c5SVitaly Wool 		const struct z3fold_ops *ops)
764d30561c5SVitaly Wool {
765d30561c5SVitaly Wool 	struct z3fold_pool *pool = NULL;
766d30561c5SVitaly Wool 	int i, cpu;
767d30561c5SVitaly Wool 
768d30561c5SVitaly Wool 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
769d30561c5SVitaly Wool 	if (!pool)
770d30561c5SVitaly Wool 		goto out;
7717c2b8baaSVitaly Wool 	pool->c_handle = kmem_cache_create("z3fold_handle",
7727c2b8baaSVitaly Wool 				sizeof(struct z3fold_buddy_slots),
7737c2b8baaSVitaly Wool 				SLOTS_ALIGN, 0, NULL);
7747c2b8baaSVitaly Wool 	if (!pool->c_handle)
7757c2b8baaSVitaly Wool 		goto out_c;
776d30561c5SVitaly Wool 	spin_lock_init(&pool->lock);
777d30561c5SVitaly Wool 	spin_lock_init(&pool->stale_lock);
778d30561c5SVitaly Wool 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
7791ec6995dSXidong Wang 	if (!pool->unbuddied)
7801ec6995dSXidong Wang 		goto out_pool;
781d30561c5SVitaly Wool 	for_each_possible_cpu(cpu) {
782d30561c5SVitaly Wool 		struct list_head *unbuddied =
783d30561c5SVitaly Wool 				per_cpu_ptr(pool->unbuddied, cpu);
784d30561c5SVitaly Wool 		for_each_unbuddied_list(i, 0)
785d30561c5SVitaly Wool 			INIT_LIST_HEAD(&unbuddied[i]);
786d30561c5SVitaly Wool 	}
787d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->lru);
788d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->stale);
789d30561c5SVitaly Wool 	atomic64_set(&pool->pages_nr, 0);
790d30561c5SVitaly Wool 	pool->name = name;
791d30561c5SVitaly Wool 	pool->compact_wq = create_singlethread_workqueue(pool->name);
792d30561c5SVitaly Wool 	if (!pool->compact_wq)
7931ec6995dSXidong Wang 		goto out_unbuddied;
794d30561c5SVitaly Wool 	pool->release_wq = create_singlethread_workqueue(pool->name);
795d30561c5SVitaly Wool 	if (!pool->release_wq)
796d30561c5SVitaly Wool 		goto out_wq;
7971f862989SVitaly Wool 	if (z3fold_register_migration(pool))
7981f862989SVitaly Wool 		goto out_rwq;
799d30561c5SVitaly Wool 	INIT_WORK(&pool->work, free_pages_work);
800d30561c5SVitaly Wool 	pool->ops = ops;
801d30561c5SVitaly Wool 	return pool;
802d30561c5SVitaly Wool 
8031f862989SVitaly Wool out_rwq:
8041f862989SVitaly Wool 	destroy_workqueue(pool->release_wq);
805d30561c5SVitaly Wool out_wq:
806d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
8071ec6995dSXidong Wang out_unbuddied:
8081ec6995dSXidong Wang 	free_percpu(pool->unbuddied);
8091ec6995dSXidong Wang out_pool:
8107c2b8baaSVitaly Wool 	kmem_cache_destroy(pool->c_handle);
8117c2b8baaSVitaly Wool out_c:
812d30561c5SVitaly Wool 	kfree(pool);
8131ec6995dSXidong Wang out:
814d30561c5SVitaly Wool 	return NULL;
815d30561c5SVitaly Wool }
816d30561c5SVitaly Wool 
817d30561c5SVitaly Wool /**
818d30561c5SVitaly Wool  * z3fold_destroy_pool() - destroys an existing z3fold pool
819d30561c5SVitaly Wool  * @pool:	the z3fold pool to be destroyed
820d30561c5SVitaly Wool  *
821d30561c5SVitaly Wool  * The pool should be emptied before this function is called.
822d30561c5SVitaly Wool  */
823d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool)
824d30561c5SVitaly Wool {
8257c2b8baaSVitaly Wool 	kmem_cache_destroy(pool->c_handle);
8266051d3bdSHenry Burns 
8276051d3bdSHenry Burns 	/*
8286051d3bdSHenry Burns 	 * We need to destroy pool->compact_wq before pool->release_wq,
8296051d3bdSHenry Burns 	 * as any pending work on pool->compact_wq will call
8306051d3bdSHenry Burns 	 * queue_work(pool->release_wq, &pool->work).
831b997052bSHenry Burns 	 *
832b997052bSHenry Burns 	 * There are still outstanding pages until both workqueues are drained,
833b997052bSHenry Burns 	 * so we cannot unregister migration until then.
8346051d3bdSHenry Burns 	 */
8356051d3bdSHenry Burns 
836d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
8376051d3bdSHenry Burns 	destroy_workqueue(pool->release_wq);
838b997052bSHenry Burns 	z3fold_unregister_migration(pool);
839d30561c5SVitaly Wool 	kfree(pool);
840d30561c5SVitaly Wool }
841d30561c5SVitaly Wool 
8429a001fc1SVitaly Wool /**
8439a001fc1SVitaly Wool  * z3fold_alloc() - allocates a region of a given size
8449a001fc1SVitaly Wool  * @pool:	z3fold pool from which to allocate
8459a001fc1SVitaly Wool  * @size:	size in bytes of the desired allocation
8469a001fc1SVitaly Wool  * @gfp:	gfp flags used if the pool needs to grow
8479a001fc1SVitaly Wool  * @handle:	handle of the new allocation
8489a001fc1SVitaly Wool  *
8499a001fc1SVitaly Wool  * This function will attempt to find a free region in the pool large enough to
8509a001fc1SVitaly Wool  * satisfy the allocation request.  A search of the unbuddied lists is
8519a001fc1SVitaly Wool  * performed first. If no suitable free region is found, then a new page is
8529a001fc1SVitaly Wool  * allocated and added to the pool to satisfy the request.
8539a001fc1SVitaly Wool  *
8549a001fc1SVitaly Wool  * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
8559a001fc1SVitaly Wool  * as z3fold pool pages.
8569a001fc1SVitaly Wool  *
8579a001fc1SVitaly Wool  * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
8589a001fc1SVitaly Wool  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
8599a001fc1SVitaly Wool  * a new page.
8609a001fc1SVitaly Wool  */
8619a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
8629a001fc1SVitaly Wool 			unsigned long *handle)
8639a001fc1SVitaly Wool {
8649050cce1SVitaly Wool 	int chunks = size_to_chunks(size);
8659a001fc1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
866d30561c5SVitaly Wool 	struct page *page = NULL;
8679a001fc1SVitaly Wool 	enum buddy bud;
8688a97ea54SMatthew Wilcox 	bool can_sleep = gfpflags_allow_blocking(gfp);
8699a001fc1SVitaly Wool 
870f1549cb5SHenry Burns 	if (!size)
8719a001fc1SVitaly Wool 		return -EINVAL;
8729a001fc1SVitaly Wool 
8739a001fc1SVitaly Wool 	if (size > PAGE_SIZE)
8749a001fc1SVitaly Wool 		return -ENOSPC;
8759a001fc1SVitaly Wool 
8769a001fc1SVitaly Wool 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
8779a001fc1SVitaly Wool 		bud = HEADLESS;
8789a001fc1SVitaly Wool 	else {
8799050cce1SVitaly Wool retry:
8809050cce1SVitaly Wool 		zhdr = __z3fold_alloc(pool, size, can_sleep);
881d30561c5SVitaly Wool 		if (zhdr) {
8829a001fc1SVitaly Wool 			if (zhdr->first_chunks == 0) {
8839a001fc1SVitaly Wool 				if (zhdr->middle_chunks != 0 &&
8849a001fc1SVitaly Wool 				    chunks >= zhdr->start_middle)
8859a001fc1SVitaly Wool 					bud = LAST;
8869a001fc1SVitaly Wool 				else
8879a001fc1SVitaly Wool 					bud = FIRST;
8889a001fc1SVitaly Wool 			} else if (zhdr->last_chunks == 0)
8899a001fc1SVitaly Wool 				bud = LAST;
8909a001fc1SVitaly Wool 			else if (zhdr->middle_chunks == 0)
8919a001fc1SVitaly Wool 				bud = MIDDLE;
8929a001fc1SVitaly Wool 			else {
8935a27aa82SVitaly Wool 				if (kref_put(&zhdr->refcount,
894d30561c5SVitaly Wool 					     release_z3fold_page_locked))
8955a27aa82SVitaly Wool 					atomic64_dec(&pool->pages_nr);
896d30561c5SVitaly Wool 				else
897d30561c5SVitaly Wool 					z3fold_page_unlock(zhdr);
8989a001fc1SVitaly Wool 				pr_err("No free chunks in unbuddied\n");
8999a001fc1SVitaly Wool 				WARN_ON(1);
9009050cce1SVitaly Wool 				goto retry;
9019a001fc1SVitaly Wool 			}
9029050cce1SVitaly Wool 			page = virt_to_page(zhdr);
9039a001fc1SVitaly Wool 			goto found;
9049a001fc1SVitaly Wool 		}
9059a001fc1SVitaly Wool 		bud = FIRST;
9069a001fc1SVitaly Wool 	}
9079a001fc1SVitaly Wool 
9085c9bab59SVitaly Wool 	page = NULL;
9095c9bab59SVitaly Wool 	if (can_sleep) {
910d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
911d30561c5SVitaly Wool 		zhdr = list_first_entry_or_null(&pool->stale,
912d30561c5SVitaly Wool 						struct z3fold_header, buddy);
913d30561c5SVitaly Wool 		/*
9145c9bab59SVitaly Wool 		 * Before allocating a page, let's see if we can take one from
9155c9bab59SVitaly Wool 		 * the stale pages list. cancel_work_sync() can sleep so we
9165c9bab59SVitaly Wool 		 * limit this case to the contexts where we can sleep
917d30561c5SVitaly Wool 		 */
9185c9bab59SVitaly Wool 		if (zhdr) {
919d30561c5SVitaly Wool 			list_del(&zhdr->buddy);
920d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
921d30561c5SVitaly Wool 			cancel_work_sync(&zhdr->work);
922d30561c5SVitaly Wool 			page = virt_to_page(zhdr);
923d30561c5SVitaly Wool 		} else {
924d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
925d30561c5SVitaly Wool 		}
9265c9bab59SVitaly Wool 	}
9275c9bab59SVitaly Wool 	if (!page)
9285c9bab59SVitaly Wool 		page = alloc_page(gfp);
929d30561c5SVitaly Wool 
9309a001fc1SVitaly Wool 	if (!page)
9319a001fc1SVitaly Wool 		return -ENOMEM;
9322f1e5e4dSVitaly Wool 
933bb9f6f63SVitaly Wool 	zhdr = init_z3fold_page(page, pool, gfp);
9349050cce1SVitaly Wool 	if (!zhdr) {
9359050cce1SVitaly Wool 		__free_page(page);
9369050cce1SVitaly Wool 		return -ENOMEM;
9379050cce1SVitaly Wool 	}
9389050cce1SVitaly Wool 	atomic64_inc(&pool->pages_nr);
9399a001fc1SVitaly Wool 
9409a001fc1SVitaly Wool 	if (bud == HEADLESS) {
9419a001fc1SVitaly Wool 		set_bit(PAGE_HEADLESS, &page->private);
9429a001fc1SVitaly Wool 		goto headless;
9439a001fc1SVitaly Wool 	}
944810481a2SHenry Burns 	if (can_sleep) {
945810481a2SHenry Burns 		lock_page(page);
9461f862989SVitaly Wool 		__SetPageMovable(page, pool->inode->i_mapping);
947810481a2SHenry Burns 		unlock_page(page);
948810481a2SHenry Burns 	} else {
949810481a2SHenry Burns 		if (trylock_page(page)) {
950810481a2SHenry Burns 			__SetPageMovable(page, pool->inode->i_mapping);
951810481a2SHenry Burns 			unlock_page(page);
952810481a2SHenry Burns 		}
953810481a2SHenry Burns 	}
9542f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
9559a001fc1SVitaly Wool 
9569a001fc1SVitaly Wool found:
9579a001fc1SVitaly Wool 	if (bud == FIRST)
9589a001fc1SVitaly Wool 		zhdr->first_chunks = chunks;
9599a001fc1SVitaly Wool 	else if (bud == LAST)
9609a001fc1SVitaly Wool 		zhdr->last_chunks = chunks;
9619a001fc1SVitaly Wool 	else {
9629a001fc1SVitaly Wool 		zhdr->middle_chunks = chunks;
963ede93213SVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
9649a001fc1SVitaly Wool 	}
9659050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
9669a001fc1SVitaly Wool 
9679a001fc1SVitaly Wool headless:
968d30561c5SVitaly Wool 	spin_lock(&pool->lock);
9699a001fc1SVitaly Wool 	/* Add/move z3fold page to beginning of LRU */
9709a001fc1SVitaly Wool 	if (!list_empty(&page->lru))
9719a001fc1SVitaly Wool 		list_del(&page->lru);
9729a001fc1SVitaly Wool 
9739a001fc1SVitaly Wool 	list_add(&page->lru, &pool->lru);
9749a001fc1SVitaly Wool 
9759a001fc1SVitaly Wool 	*handle = encode_handle(zhdr, bud);
9769a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
9772f1e5e4dSVitaly Wool 	if (bud != HEADLESS)
9782f1e5e4dSVitaly Wool 		z3fold_page_unlock(zhdr);
9799a001fc1SVitaly Wool 
9809a001fc1SVitaly Wool 	return 0;
9819a001fc1SVitaly Wool }
9829a001fc1SVitaly Wool 
9839a001fc1SVitaly Wool /**
9849a001fc1SVitaly Wool  * z3fold_free() - frees the allocation associated with the given handle
9859a001fc1SVitaly Wool  * @pool:	pool in which the allocation resided
9869a001fc1SVitaly Wool  * @handle:	handle associated with the allocation returned by z3fold_alloc()
9879a001fc1SVitaly Wool  *
9889a001fc1SVitaly Wool  * In the case that the z3fold page in which the allocation resides is under
9899a001fc1SVitaly Wool  * reclaim, as indicated by the PG_reclaim flag being set, this function
9909a001fc1SVitaly Wool  * only sets the first|last_chunks to 0.  The page is actually freed
9919a001fc1SVitaly Wool  * once both buddies are evicted (see z3fold_reclaim_page() below).
9929a001fc1SVitaly Wool  */
9939a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
9949a001fc1SVitaly Wool {
9959a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
9969a001fc1SVitaly Wool 	struct page *page;
9979a001fc1SVitaly Wool 	enum buddy bud;
9989a001fc1SVitaly Wool 
9999a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
10009a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
10019a001fc1SVitaly Wool 
10029a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private)) {
1003ca0246bbSVitaly Wool 		/* if a headless page is under reclaim, just leave.
1004ca0246bbSVitaly Wool 		 * NB: we use test_and_set_bit for a reason: if the bit
1005ca0246bbSVitaly Wool 		 * has not been set before, we release this page
1006ca0246bbSVitaly Wool 		 * immediately so we don't care about its value any more.
1007ca0246bbSVitaly Wool 		 */
1008ca0246bbSVitaly Wool 		if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
1009ca0246bbSVitaly Wool 			spin_lock(&pool->lock);
1010ca0246bbSVitaly Wool 			list_del(&page->lru);
1011ca0246bbSVitaly Wool 			spin_unlock(&pool->lock);
10121f862989SVitaly Wool 			free_z3fold_page(page, true);
1013ca0246bbSVitaly Wool 			atomic64_dec(&pool->pages_nr);
1014ca0246bbSVitaly Wool 		}
1015ca0246bbSVitaly Wool 		return;
1016ca0246bbSVitaly Wool 	}
1017ca0246bbSVitaly Wool 
1018ca0246bbSVitaly Wool 	/* Non-headless case */
10192f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
102043afc194SVitaly Wool 	bud = handle_to_buddy(handle);
10219a001fc1SVitaly Wool 
10229a001fc1SVitaly Wool 	switch (bud) {
10239a001fc1SVitaly Wool 	case FIRST:
10249a001fc1SVitaly Wool 		zhdr->first_chunks = 0;
10259a001fc1SVitaly Wool 		break;
10269a001fc1SVitaly Wool 	case MIDDLE:
10279a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
10289a001fc1SVitaly Wool 		break;
10299a001fc1SVitaly Wool 	case LAST:
10309a001fc1SVitaly Wool 		zhdr->last_chunks = 0;
10319a001fc1SVitaly Wool 		break;
10329a001fc1SVitaly Wool 	default:
10339a001fc1SVitaly Wool 		pr_err("%s: unknown bud %d\n", __func__, bud);
10349a001fc1SVitaly Wool 		WARN_ON(1);
10352f1e5e4dSVitaly Wool 		z3fold_page_unlock(zhdr);
10369a001fc1SVitaly Wool 		return;
10379a001fc1SVitaly Wool 	}
10389a001fc1SVitaly Wool 
10397c2b8baaSVitaly Wool 	free_handle(handle);
1040d30561c5SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
1041d30561c5SVitaly Wool 		atomic64_dec(&pool->pages_nr);
1042d30561c5SVitaly Wool 		return;
1043d30561c5SVitaly Wool 	}
1044ca0246bbSVitaly Wool 	if (test_bit(PAGE_CLAIMED, &page->private)) {
10456098d7e1SVitaly Wool 		z3fold_page_unlock(zhdr);
10466098d7e1SVitaly Wool 		return;
10476098d7e1SVitaly Wool 	}
10481f862989SVitaly Wool 	if (unlikely(PageIsolated(page)) ||
10491f862989SVitaly Wool 	    test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
1050d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
1051d30561c5SVitaly Wool 		return;
1052d30561c5SVitaly Wool 	}
1053d30561c5SVitaly Wool 	if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
1054d30561c5SVitaly Wool 		spin_lock(&pool->lock);
1055d30561c5SVitaly Wool 		list_del_init(&zhdr->buddy);
1056d30561c5SVitaly Wool 		spin_unlock(&pool->lock);
1057d30561c5SVitaly Wool 		zhdr->cpu = -1;
10585d03a661SVitaly Wool 		kref_get(&zhdr->refcount);
1059d30561c5SVitaly Wool 		do_compact_page(zhdr, true);
1060d30561c5SVitaly Wool 		return;
1061d30561c5SVitaly Wool 	}
10625d03a661SVitaly Wool 	kref_get(&zhdr->refcount);
1063d30561c5SVitaly Wool 	queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
1064d30561c5SVitaly Wool 	z3fold_page_unlock(zhdr);
10659a001fc1SVitaly Wool }
10669a001fc1SVitaly Wool 
10679a001fc1SVitaly Wool /**
10689a001fc1SVitaly Wool  * z3fold_reclaim_page() - evicts allocations from a pool page and frees it
10699a001fc1SVitaly Wool  * @pool:	pool from which a page will attempt to be evicted
1070f144c390SMike Rapoport  * @retries:	number of pages on the LRU list for which eviction will
10719a001fc1SVitaly Wool  *		be attempted before failing
10729a001fc1SVitaly Wool  *
10739a001fc1SVitaly Wool  * z3fold reclaim is different from normal system reclaim in that it is done
10749a001fc1SVitaly Wool  * from the bottom, up. This is because only the bottom layer, z3fold, has
10759a001fc1SVitaly Wool  * information on how the allocations are organized within each z3fold page.
10769a001fc1SVitaly Wool  * This has the potential to create interesting locking situations between
10779a001fc1SVitaly Wool  * z3fold and the user, however.
10789a001fc1SVitaly Wool  *
10799a001fc1SVitaly Wool  * To avoid these, this is how z3fold_reclaim_page() should be called:
1080f144c390SMike Rapoport  *
10819a001fc1SVitaly Wool  * The user detects a page should be reclaimed and calls z3fold_reclaim_page().
10829a001fc1SVitaly Wool  * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and
10839a001fc1SVitaly Wool  * call the user-defined eviction handler with the pool and handle as
10849a001fc1SVitaly Wool  * arguments.
10859a001fc1SVitaly Wool  *
10869a001fc1SVitaly Wool  * If the handle can not be evicted, the eviction handler should return
10879a001fc1SVitaly Wool  * non-zero. z3fold_reclaim_page() will add the z3fold page back to the
10889a001fc1SVitaly Wool  * appropriate list and try the next z3fold page on the LRU up to
10899a001fc1SVitaly Wool  * a user defined number of retries.
10909a001fc1SVitaly Wool  *
10919a001fc1SVitaly Wool  * If the handle is successfully evicted, the eviction handler should
10929a001fc1SVitaly Wool  * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free()
10939a001fc1SVitaly Wool  * contains logic to delay freeing the page if the page is under reclaim,
10949a001fc1SVitaly Wool  * as indicated by the setting of the PG_reclaim flag on the underlying page.
10959a001fc1SVitaly Wool  *
10969a001fc1SVitaly Wool  * If all buddies in the z3fold page are successfully evicted, then the
10979a001fc1SVitaly Wool  * z3fold page can be freed.
10989a001fc1SVitaly Wool  *
10999a001fc1SVitaly Wool  * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
11009a001fc1SVitaly Wool  * no pages to evict or an eviction handler is not registered, -EAGAIN if
11019a001fc1SVitaly Wool  * the retry limit was hit.
11029a001fc1SVitaly Wool  */
11039a001fc1SVitaly Wool static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
11049a001fc1SVitaly Wool {
1105d30561c5SVitaly Wool 	int i, ret = 0;
1106d30561c5SVitaly Wool 	struct z3fold_header *zhdr = NULL;
1107d30561c5SVitaly Wool 	struct page *page = NULL;
1108d30561c5SVitaly Wool 	struct list_head *pos;
1109*3f9d2b57SVitaly Wool 	struct z3fold_buddy_slots slots;
11109a001fc1SVitaly Wool 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
11119a001fc1SVitaly Wool 
11129a001fc1SVitaly Wool 	spin_lock(&pool->lock);
11132f1e5e4dSVitaly Wool 	if (!pool->ops || !pool->ops->evict || retries == 0) {
11149a001fc1SVitaly Wool 		spin_unlock(&pool->lock);
11159a001fc1SVitaly Wool 		return -EINVAL;
11169a001fc1SVitaly Wool 	}
11179a001fc1SVitaly Wool 	for (i = 0; i < retries; i++) {
11182f1e5e4dSVitaly Wool 		if (list_empty(&pool->lru)) {
11192f1e5e4dSVitaly Wool 			spin_unlock(&pool->lock);
11202f1e5e4dSVitaly Wool 			return -EINVAL;
11212f1e5e4dSVitaly Wool 		}
1122d30561c5SVitaly Wool 		list_for_each_prev(pos, &pool->lru) {
1123d30561c5SVitaly Wool 			page = list_entry(pos, struct page, lru);
1124ca0246bbSVitaly Wool 
1125ca0246bbSVitaly Wool 			/* this bit could have been set by free, in which case
1126ca0246bbSVitaly Wool 			 * we pass over to the next page in the pool.
1127ca0246bbSVitaly Wool 			 */
1128*3f9d2b57SVitaly Wool 			if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
1129*3f9d2b57SVitaly Wool 				page = NULL;
1130ca0246bbSVitaly Wool 				continue;
1131*3f9d2b57SVitaly Wool 			}
11329a001fc1SVitaly Wool 
1133*3f9d2b57SVitaly Wool 			if (unlikely(PageIsolated(page))) {
1134*3f9d2b57SVitaly Wool 				clear_bit(PAGE_CLAIMED, &page->private);
1135*3f9d2b57SVitaly Wool 				page = NULL;
11361f862989SVitaly Wool 				continue;
1137*3f9d2b57SVitaly Wool 			}
1138*3f9d2b57SVitaly Wool 			zhdr = page_address(page);
1139ca0246bbSVitaly Wool 			if (test_bit(PAGE_HEADLESS, &page->private))
1140ca0246bbSVitaly Wool 				break;
1141ca0246bbSVitaly Wool 
1142ca0246bbSVitaly Wool 			if (!z3fold_page_trylock(zhdr)) {
1143*3f9d2b57SVitaly Wool 				clear_bit(PAGE_CLAIMED, &page->private);
1144ca0246bbSVitaly Wool 				zhdr = NULL;
1145d30561c5SVitaly Wool 				continue; /* can't evict at this point */
1146ca0246bbSVitaly Wool 			}
11475a27aa82SVitaly Wool 			kref_get(&zhdr->refcount);
1148d30561c5SVitaly Wool 			list_del_init(&zhdr->buddy);
1149d30561c5SVitaly Wool 			zhdr->cpu = -1;
11506098d7e1SVitaly Wool 			break;
1151d30561c5SVitaly Wool 		}
1152d30561c5SVitaly Wool 
1153ca0246bbSVitaly Wool 		if (!zhdr)
1154ca0246bbSVitaly Wool 			break;
1155ca0246bbSVitaly Wool 
1156d30561c5SVitaly Wool 		list_del_init(&page->lru);
11572f1e5e4dSVitaly Wool 		spin_unlock(&pool->lock);
1158d30561c5SVitaly Wool 
1159d30561c5SVitaly Wool 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
11609a001fc1SVitaly Wool 			/*
1161*3f9d2b57SVitaly Wool 			 * We need encode the handles before unlocking, and
1162*3f9d2b57SVitaly Wool 			 * use our local slots structure because z3fold_free
1163*3f9d2b57SVitaly Wool 			 * can zero out zhdr->slots and we can't do much
1164*3f9d2b57SVitaly Wool 			 * about that
11659a001fc1SVitaly Wool 			 */
11669a001fc1SVitaly Wool 			first_handle = 0;
11679a001fc1SVitaly Wool 			last_handle = 0;
11689a001fc1SVitaly Wool 			middle_handle = 0;
11699a001fc1SVitaly Wool 			if (zhdr->first_chunks)
1170*3f9d2b57SVitaly Wool 				first_handle = __encode_handle(zhdr, &slots,
1171*3f9d2b57SVitaly Wool 								FIRST);
11729a001fc1SVitaly Wool 			if (zhdr->middle_chunks)
1173*3f9d2b57SVitaly Wool 				middle_handle = __encode_handle(zhdr, &slots,
1174*3f9d2b57SVitaly Wool 								MIDDLE);
11759a001fc1SVitaly Wool 			if (zhdr->last_chunks)
1176*3f9d2b57SVitaly Wool 				last_handle = __encode_handle(zhdr, &slots,
1177*3f9d2b57SVitaly Wool 								LAST);
1178d30561c5SVitaly Wool 			/*
1179d30561c5SVitaly Wool 			 * it's safe to unlock here because we hold a
1180d30561c5SVitaly Wool 			 * reference to this page
1181d30561c5SVitaly Wool 			 */
11822f1e5e4dSVitaly Wool 			z3fold_page_unlock(zhdr);
11839a001fc1SVitaly Wool 		} else {
1184*3f9d2b57SVitaly Wool 			first_handle = __encode_handle(zhdr, &slots, HEADLESS);
11859a001fc1SVitaly Wool 			last_handle = middle_handle = 0;
11862f1e5e4dSVitaly Wool 		}
11879a001fc1SVitaly Wool 
11889a001fc1SVitaly Wool 		/* Issue the eviction callback(s) */
11899a001fc1SVitaly Wool 		if (middle_handle) {
11909a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, middle_handle);
11919a001fc1SVitaly Wool 			if (ret)
11929a001fc1SVitaly Wool 				goto next;
11939a001fc1SVitaly Wool 		}
11949a001fc1SVitaly Wool 		if (first_handle) {
11959a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, first_handle);
11969a001fc1SVitaly Wool 			if (ret)
11979a001fc1SVitaly Wool 				goto next;
11989a001fc1SVitaly Wool 		}
11999a001fc1SVitaly Wool 		if (last_handle) {
12009a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, last_handle);
12019a001fc1SVitaly Wool 			if (ret)
12029a001fc1SVitaly Wool 				goto next;
12039a001fc1SVitaly Wool 		}
12049a001fc1SVitaly Wool next:
12055a27aa82SVitaly Wool 		if (test_bit(PAGE_HEADLESS, &page->private)) {
12065a27aa82SVitaly Wool 			if (ret == 0) {
12071f862989SVitaly Wool 				free_z3fold_page(page, true);
1208ca0246bbSVitaly Wool 				atomic64_dec(&pool->pages_nr);
12099a001fc1SVitaly Wool 				return 0;
12105a27aa82SVitaly Wool 			}
12116098d7e1SVitaly Wool 			spin_lock(&pool->lock);
12126098d7e1SVitaly Wool 			list_add(&page->lru, &pool->lru);
1213d5567c9dSVitaly Wool 			spin_unlock(&pool->lock);
1214*3f9d2b57SVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
12156098d7e1SVitaly Wool 		} else {
12166098d7e1SVitaly Wool 			z3fold_page_lock(zhdr);
12176098d7e1SVitaly Wool 			if (kref_put(&zhdr->refcount,
12186098d7e1SVitaly Wool 					release_z3fold_page_locked)) {
12196098d7e1SVitaly Wool 				atomic64_dec(&pool->pages_nr);
12205a27aa82SVitaly Wool 				return 0;
12215a27aa82SVitaly Wool 			}
12225a27aa82SVitaly Wool 			/*
12236098d7e1SVitaly Wool 			 * if we are here, the page is still not completely
12246098d7e1SVitaly Wool 			 * free. Take the global pool lock then to be able
12256098d7e1SVitaly Wool 			 * to add it back to the lru list
12265a27aa82SVitaly Wool 			 */
12276098d7e1SVitaly Wool 			spin_lock(&pool->lock);
12289a001fc1SVitaly Wool 			list_add(&page->lru, &pool->lru);
12296098d7e1SVitaly Wool 			spin_unlock(&pool->lock);
12306098d7e1SVitaly Wool 			z3fold_page_unlock(zhdr);
1231*3f9d2b57SVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
12326098d7e1SVitaly Wool 		}
12336098d7e1SVitaly Wool 
12346098d7e1SVitaly Wool 		/* We started off locked to we need to lock the pool back */
12356098d7e1SVitaly Wool 		spin_lock(&pool->lock);
12369a001fc1SVitaly Wool 	}
12379a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
12389a001fc1SVitaly Wool 	return -EAGAIN;
12399a001fc1SVitaly Wool }
12409a001fc1SVitaly Wool 
12419a001fc1SVitaly Wool /**
12429a001fc1SVitaly Wool  * z3fold_map() - maps the allocation associated with the given handle
12439a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
12449a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be mapped
12459a001fc1SVitaly Wool  *
12469a001fc1SVitaly Wool  * Extracts the buddy number from handle and constructs the pointer to the
12479a001fc1SVitaly Wool  * correct starting chunk within the page.
12489a001fc1SVitaly Wool  *
12499a001fc1SVitaly Wool  * Returns: a pointer to the mapped allocation
12509a001fc1SVitaly Wool  */
12519a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
12529a001fc1SVitaly Wool {
12539a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
12549a001fc1SVitaly Wool 	struct page *page;
12559a001fc1SVitaly Wool 	void *addr;
12569a001fc1SVitaly Wool 	enum buddy buddy;
12579a001fc1SVitaly Wool 
12589a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
12599a001fc1SVitaly Wool 	addr = zhdr;
12609a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
12619a001fc1SVitaly Wool 
12629a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
12639a001fc1SVitaly Wool 		goto out;
12649a001fc1SVitaly Wool 
12652f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
12669a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
12679a001fc1SVitaly Wool 	switch (buddy) {
12689a001fc1SVitaly Wool 	case FIRST:
12699a001fc1SVitaly Wool 		addr += ZHDR_SIZE_ALIGNED;
12709a001fc1SVitaly Wool 		break;
12719a001fc1SVitaly Wool 	case MIDDLE:
12729a001fc1SVitaly Wool 		addr += zhdr->start_middle << CHUNK_SHIFT;
12739a001fc1SVitaly Wool 		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
12749a001fc1SVitaly Wool 		break;
12759a001fc1SVitaly Wool 	case LAST:
1276ca0246bbSVitaly Wool 		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
12779a001fc1SVitaly Wool 		break;
12789a001fc1SVitaly Wool 	default:
12799a001fc1SVitaly Wool 		pr_err("unknown buddy id %d\n", buddy);
12809a001fc1SVitaly Wool 		WARN_ON(1);
12819a001fc1SVitaly Wool 		addr = NULL;
12829a001fc1SVitaly Wool 		break;
12839a001fc1SVitaly Wool 	}
12842f1e5e4dSVitaly Wool 
12851f862989SVitaly Wool 	if (addr)
12861f862989SVitaly Wool 		zhdr->mapped_count++;
12872f1e5e4dSVitaly Wool 	z3fold_page_unlock(zhdr);
12889a001fc1SVitaly Wool out:
12899a001fc1SVitaly Wool 	return addr;
12909a001fc1SVitaly Wool }
12919a001fc1SVitaly Wool 
12929a001fc1SVitaly Wool /**
12939a001fc1SVitaly Wool  * z3fold_unmap() - unmaps the allocation associated with the given handle
12949a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
12959a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be unmapped
12969a001fc1SVitaly Wool  */
12979a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
12989a001fc1SVitaly Wool {
12999a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
13009a001fc1SVitaly Wool 	struct page *page;
13019a001fc1SVitaly Wool 	enum buddy buddy;
13029a001fc1SVitaly Wool 
13039a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
13049a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
13059a001fc1SVitaly Wool 
13062f1e5e4dSVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
13079a001fc1SVitaly Wool 		return;
13089a001fc1SVitaly Wool 
13092f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
13109a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
13119a001fc1SVitaly Wool 	if (buddy == MIDDLE)
13129a001fc1SVitaly Wool 		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
13131f862989SVitaly Wool 	zhdr->mapped_count--;
13142f1e5e4dSVitaly Wool 	z3fold_page_unlock(zhdr);
13159a001fc1SVitaly Wool }
13169a001fc1SVitaly Wool 
13179a001fc1SVitaly Wool /**
13189a001fc1SVitaly Wool  * z3fold_get_pool_size() - gets the z3fold pool size in pages
13199a001fc1SVitaly Wool  * @pool:	pool whose size is being queried
13209a001fc1SVitaly Wool  *
132112d59ae6SVitaly Wool  * Returns: size in pages of the given pool.
13229a001fc1SVitaly Wool  */
13239a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
13249a001fc1SVitaly Wool {
132512d59ae6SVitaly Wool 	return atomic64_read(&pool->pages_nr);
13269a001fc1SVitaly Wool }
13279a001fc1SVitaly Wool 
13281f862989SVitaly Wool static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
13291f862989SVitaly Wool {
13301f862989SVitaly Wool 	struct z3fold_header *zhdr;
13311f862989SVitaly Wool 	struct z3fold_pool *pool;
13321f862989SVitaly Wool 
13331f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageMovable(page), page);
13341f862989SVitaly Wool 	VM_BUG_ON_PAGE(PageIsolated(page), page);
13351f862989SVitaly Wool 
1336*3f9d2b57SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private) ||
1337*3f9d2b57SVitaly Wool 	    test_bit(PAGE_CLAIMED, &page->private))
13381f862989SVitaly Wool 		return false;
13391f862989SVitaly Wool 
13401f862989SVitaly Wool 	zhdr = page_address(page);
13411f862989SVitaly Wool 	z3fold_page_lock(zhdr);
13421f862989SVitaly Wool 	if (test_bit(NEEDS_COMPACTING, &page->private) ||
13431f862989SVitaly Wool 	    test_bit(PAGE_STALE, &page->private))
13441f862989SVitaly Wool 		goto out;
13451f862989SVitaly Wool 
13461f862989SVitaly Wool 	pool = zhdr_to_pool(zhdr);
13471f862989SVitaly Wool 
13481f862989SVitaly Wool 	if (zhdr->mapped_count == 0) {
13491f862989SVitaly Wool 		kref_get(&zhdr->refcount);
13501f862989SVitaly Wool 		if (!list_empty(&zhdr->buddy))
13511f862989SVitaly Wool 			list_del_init(&zhdr->buddy);
13521f862989SVitaly Wool 		spin_lock(&pool->lock);
13531f862989SVitaly Wool 		if (!list_empty(&page->lru))
13541f862989SVitaly Wool 			list_del(&page->lru);
13551f862989SVitaly Wool 		spin_unlock(&pool->lock);
13561f862989SVitaly Wool 		z3fold_page_unlock(zhdr);
13571f862989SVitaly Wool 		return true;
13581f862989SVitaly Wool 	}
13591f862989SVitaly Wool out:
13601f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
13611f862989SVitaly Wool 	return false;
13621f862989SVitaly Wool }
13631f862989SVitaly Wool 
13641f862989SVitaly Wool static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage,
13651f862989SVitaly Wool 			       struct page *page, enum migrate_mode mode)
13661f862989SVitaly Wool {
13671f862989SVitaly Wool 	struct z3fold_header *zhdr, *new_zhdr;
13681f862989SVitaly Wool 	struct z3fold_pool *pool;
13691f862989SVitaly Wool 	struct address_space *new_mapping;
13701f862989SVitaly Wool 
13711f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageMovable(page), page);
13721f862989SVitaly Wool 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
1373810481a2SHenry Burns 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
13741f862989SVitaly Wool 
13751f862989SVitaly Wool 	zhdr = page_address(page);
13761f862989SVitaly Wool 	pool = zhdr_to_pool(zhdr);
13771f862989SVitaly Wool 
13781f862989SVitaly Wool 	if (!z3fold_page_trylock(zhdr)) {
13791f862989SVitaly Wool 		return -EAGAIN;
13801f862989SVitaly Wool 	}
13811f862989SVitaly Wool 	if (zhdr->mapped_count != 0) {
13821f862989SVitaly Wool 		z3fold_page_unlock(zhdr);
13831f862989SVitaly Wool 		return -EBUSY;
13841f862989SVitaly Wool 	}
1385c92d2f38SHenry Burns 	if (work_pending(&zhdr->work)) {
1386c92d2f38SHenry Burns 		z3fold_page_unlock(zhdr);
1387c92d2f38SHenry Burns 		return -EAGAIN;
1388c92d2f38SHenry Burns 	}
13891f862989SVitaly Wool 	new_zhdr = page_address(newpage);
13901f862989SVitaly Wool 	memcpy(new_zhdr, zhdr, PAGE_SIZE);
13911f862989SVitaly Wool 	newpage->private = page->private;
13921f862989SVitaly Wool 	page->private = 0;
13931f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
13941f862989SVitaly Wool 	spin_lock_init(&new_zhdr->page_lock);
1395c92d2f38SHenry Burns 	INIT_WORK(&new_zhdr->work, compact_page_work);
1396c92d2f38SHenry Burns 	/*
1397c92d2f38SHenry Burns 	 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty,
1398c92d2f38SHenry Burns 	 * so we only have to reinitialize it.
1399c92d2f38SHenry Burns 	 */
1400c92d2f38SHenry Burns 	INIT_LIST_HEAD(&new_zhdr->buddy);
14011f862989SVitaly Wool 	new_mapping = page_mapping(page);
14021f862989SVitaly Wool 	__ClearPageMovable(page);
14031f862989SVitaly Wool 	ClearPagePrivate(page);
14041f862989SVitaly Wool 
14051f862989SVitaly Wool 	get_page(newpage);
14061f862989SVitaly Wool 	z3fold_page_lock(new_zhdr);
14071f862989SVitaly Wool 	if (new_zhdr->first_chunks)
14081f862989SVitaly Wool 		encode_handle(new_zhdr, FIRST);
14091f862989SVitaly Wool 	if (new_zhdr->last_chunks)
14101f862989SVitaly Wool 		encode_handle(new_zhdr, LAST);
14111f862989SVitaly Wool 	if (new_zhdr->middle_chunks)
14121f862989SVitaly Wool 		encode_handle(new_zhdr, MIDDLE);
14131f862989SVitaly Wool 	set_bit(NEEDS_COMPACTING, &newpage->private);
14141f862989SVitaly Wool 	new_zhdr->cpu = smp_processor_id();
14151f862989SVitaly Wool 	spin_lock(&pool->lock);
14161f862989SVitaly Wool 	list_add(&newpage->lru, &pool->lru);
14171f862989SVitaly Wool 	spin_unlock(&pool->lock);
14181f862989SVitaly Wool 	__SetPageMovable(newpage, new_mapping);
14191f862989SVitaly Wool 	z3fold_page_unlock(new_zhdr);
14201f862989SVitaly Wool 
14211f862989SVitaly Wool 	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
14221f862989SVitaly Wool 
14231f862989SVitaly Wool 	page_mapcount_reset(page);
14241f862989SVitaly Wool 	put_page(page);
14251f862989SVitaly Wool 	return 0;
14261f862989SVitaly Wool }
14271f862989SVitaly Wool 
14281f862989SVitaly Wool static void z3fold_page_putback(struct page *page)
14291f862989SVitaly Wool {
14301f862989SVitaly Wool 	struct z3fold_header *zhdr;
14311f862989SVitaly Wool 	struct z3fold_pool *pool;
14321f862989SVitaly Wool 
14331f862989SVitaly Wool 	zhdr = page_address(page);
14341f862989SVitaly Wool 	pool = zhdr_to_pool(zhdr);
14351f862989SVitaly Wool 
14361f862989SVitaly Wool 	z3fold_page_lock(zhdr);
14371f862989SVitaly Wool 	if (!list_empty(&zhdr->buddy))
14381f862989SVitaly Wool 		list_del_init(&zhdr->buddy);
14391f862989SVitaly Wool 	INIT_LIST_HEAD(&page->lru);
14401f862989SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
14411f862989SVitaly Wool 		atomic64_dec(&pool->pages_nr);
14421f862989SVitaly Wool 		return;
14431f862989SVitaly Wool 	}
14441f862989SVitaly Wool 	spin_lock(&pool->lock);
14451f862989SVitaly Wool 	list_add(&page->lru, &pool->lru);
14461f862989SVitaly Wool 	spin_unlock(&pool->lock);
14471f862989SVitaly Wool 	z3fold_page_unlock(zhdr);
14481f862989SVitaly Wool }
14491f862989SVitaly Wool 
14501f862989SVitaly Wool static const struct address_space_operations z3fold_aops = {
14511f862989SVitaly Wool 	.isolate_page = z3fold_page_isolate,
14521f862989SVitaly Wool 	.migratepage = z3fold_page_migrate,
14531f862989SVitaly Wool 	.putback_page = z3fold_page_putback,
14541f862989SVitaly Wool };
14551f862989SVitaly Wool 
14569a001fc1SVitaly Wool /*****************
14579a001fc1SVitaly Wool  * zpool
14589a001fc1SVitaly Wool  ****************/
14599a001fc1SVitaly Wool 
14609a001fc1SVitaly Wool static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
14619a001fc1SVitaly Wool {
14629a001fc1SVitaly Wool 	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
14639a001fc1SVitaly Wool 		return pool->zpool_ops->evict(pool->zpool, handle);
14649a001fc1SVitaly Wool 	else
14659a001fc1SVitaly Wool 		return -ENOENT;
14669a001fc1SVitaly Wool }
14679a001fc1SVitaly Wool 
14689a001fc1SVitaly Wool static const struct z3fold_ops z3fold_zpool_ops = {
14699a001fc1SVitaly Wool 	.evict =	z3fold_zpool_evict
14709a001fc1SVitaly Wool };
14719a001fc1SVitaly Wool 
14729a001fc1SVitaly Wool static void *z3fold_zpool_create(const char *name, gfp_t gfp,
14739a001fc1SVitaly Wool 			       const struct zpool_ops *zpool_ops,
14749a001fc1SVitaly Wool 			       struct zpool *zpool)
14759a001fc1SVitaly Wool {
14769a001fc1SVitaly Wool 	struct z3fold_pool *pool;
14779a001fc1SVitaly Wool 
1478d30561c5SVitaly Wool 	pool = z3fold_create_pool(name, gfp,
1479d30561c5SVitaly Wool 				zpool_ops ? &z3fold_zpool_ops : NULL);
14809a001fc1SVitaly Wool 	if (pool) {
14819a001fc1SVitaly Wool 		pool->zpool = zpool;
14829a001fc1SVitaly Wool 		pool->zpool_ops = zpool_ops;
14839a001fc1SVitaly Wool 	}
14849a001fc1SVitaly Wool 	return pool;
14859a001fc1SVitaly Wool }
14869a001fc1SVitaly Wool 
14879a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool)
14889a001fc1SVitaly Wool {
14899a001fc1SVitaly Wool 	z3fold_destroy_pool(pool);
14909a001fc1SVitaly Wool }
14919a001fc1SVitaly Wool 
14929a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp,
14939a001fc1SVitaly Wool 			unsigned long *handle)
14949a001fc1SVitaly Wool {
14959a001fc1SVitaly Wool 	return z3fold_alloc(pool, size, gfp, handle);
14969a001fc1SVitaly Wool }
14979a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle)
14989a001fc1SVitaly Wool {
14999a001fc1SVitaly Wool 	z3fold_free(pool, handle);
15009a001fc1SVitaly Wool }
15019a001fc1SVitaly Wool 
15029a001fc1SVitaly Wool static int z3fold_zpool_shrink(void *pool, unsigned int pages,
15039a001fc1SVitaly Wool 			unsigned int *reclaimed)
15049a001fc1SVitaly Wool {
15059a001fc1SVitaly Wool 	unsigned int total = 0;
15069a001fc1SVitaly Wool 	int ret = -EINVAL;
15079a001fc1SVitaly Wool 
15089a001fc1SVitaly Wool 	while (total < pages) {
15099a001fc1SVitaly Wool 		ret = z3fold_reclaim_page(pool, 8);
15109a001fc1SVitaly Wool 		if (ret < 0)
15119a001fc1SVitaly Wool 			break;
15129a001fc1SVitaly Wool 		total++;
15139a001fc1SVitaly Wool 	}
15149a001fc1SVitaly Wool 
15159a001fc1SVitaly Wool 	if (reclaimed)
15169a001fc1SVitaly Wool 		*reclaimed = total;
15179a001fc1SVitaly Wool 
15189a001fc1SVitaly Wool 	return ret;
15199a001fc1SVitaly Wool }
15209a001fc1SVitaly Wool 
15219a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle,
15229a001fc1SVitaly Wool 			enum zpool_mapmode mm)
15239a001fc1SVitaly Wool {
15249a001fc1SVitaly Wool 	return z3fold_map(pool, handle);
15259a001fc1SVitaly Wool }
15269a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle)
15279a001fc1SVitaly Wool {
15289a001fc1SVitaly Wool 	z3fold_unmap(pool, handle);
15299a001fc1SVitaly Wool }
15309a001fc1SVitaly Wool 
15319a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool)
15329a001fc1SVitaly Wool {
15339a001fc1SVitaly Wool 	return z3fold_get_pool_size(pool) * PAGE_SIZE;
15349a001fc1SVitaly Wool }
15359a001fc1SVitaly Wool 
15369a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = {
15379a001fc1SVitaly Wool 	.type =		"z3fold",
15389a001fc1SVitaly Wool 	.owner =	THIS_MODULE,
15399a001fc1SVitaly Wool 	.create =	z3fold_zpool_create,
15409a001fc1SVitaly Wool 	.destroy =	z3fold_zpool_destroy,
15419a001fc1SVitaly Wool 	.malloc =	z3fold_zpool_malloc,
15429a001fc1SVitaly Wool 	.free =		z3fold_zpool_free,
15439a001fc1SVitaly Wool 	.shrink =	z3fold_zpool_shrink,
15449a001fc1SVitaly Wool 	.map =		z3fold_zpool_map,
15459a001fc1SVitaly Wool 	.unmap =	z3fold_zpool_unmap,
15469a001fc1SVitaly Wool 	.total_size =	z3fold_zpool_total_size,
15479a001fc1SVitaly Wool };
15489a001fc1SVitaly Wool 
15499a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold");
15509a001fc1SVitaly Wool 
15519a001fc1SVitaly Wool static int __init init_z3fold(void)
15529a001fc1SVitaly Wool {
15531f862989SVitaly Wool 	int ret;
15541f862989SVitaly Wool 
1555ede93213SVitaly Wool 	/* Make sure the z3fold header is not larger than the page size */
1556ede93213SVitaly Wool 	BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
15571f862989SVitaly Wool 	ret = z3fold_mount();
15581f862989SVitaly Wool 	if (ret)
15591f862989SVitaly Wool 		return ret;
15601f862989SVitaly Wool 
15619a001fc1SVitaly Wool 	zpool_register_driver(&z3fold_zpool_driver);
15629a001fc1SVitaly Wool 
15639a001fc1SVitaly Wool 	return 0;
15649a001fc1SVitaly Wool }
15659a001fc1SVitaly Wool 
15669a001fc1SVitaly Wool static void __exit exit_z3fold(void)
15679a001fc1SVitaly Wool {
15681f862989SVitaly Wool 	z3fold_unmount();
15699a001fc1SVitaly Wool 	zpool_unregister_driver(&z3fold_zpool_driver);
15709a001fc1SVitaly Wool }
15719a001fc1SVitaly Wool 
15729a001fc1SVitaly Wool module_init(init_z3fold);
15739a001fc1SVitaly Wool module_exit(exit_z3fold);
15749a001fc1SVitaly Wool 
15759a001fc1SVitaly Wool MODULE_LICENSE("GPL");
15769a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>");
15779a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");
1578