xref: /linux/mm/z3fold.c (revision 351618b203acef13946a03ecf18fbe328c3cdb58)
19a001fc1SVitaly Wool /*
29a001fc1SVitaly Wool  * z3fold.c
39a001fc1SVitaly Wool  *
49a001fc1SVitaly Wool  * Author: Vitaly Wool <vitaly.wool@konsulko.com>
59a001fc1SVitaly Wool  * Copyright (C) 2016, Sony Mobile Communications Inc.
69a001fc1SVitaly Wool  *
79a001fc1SVitaly Wool  * This implementation is based on zbud written by Seth Jennings.
89a001fc1SVitaly Wool  *
99a001fc1SVitaly Wool  * z3fold is an special purpose allocator for storing compressed pages. It
109a001fc1SVitaly Wool  * can store up to three compressed pages per page which improves the
119a001fc1SVitaly Wool  * compression ratio of zbud while retaining its main concepts (e. g. always
129a001fc1SVitaly Wool  * storing an integral number of objects per page) and simplicity.
139a001fc1SVitaly Wool  * It still has simple and deterministic reclaim properties that make it
149a001fc1SVitaly Wool  * preferable to a higher density approach (with no requirement on integral
159a001fc1SVitaly Wool  * number of object per page) when reclaim is used.
169a001fc1SVitaly Wool  *
179a001fc1SVitaly Wool  * As in zbud, pages are divided into "chunks".  The size of the chunks is
189a001fc1SVitaly Wool  * fixed at compile time and is determined by NCHUNKS_ORDER below.
199a001fc1SVitaly Wool  *
209a001fc1SVitaly Wool  * z3fold doesn't export any API and is meant to be used via zpool API.
219a001fc1SVitaly Wool  */
229a001fc1SVitaly Wool 
239a001fc1SVitaly Wool #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
249a001fc1SVitaly Wool 
259a001fc1SVitaly Wool #include <linux/atomic.h>
26d30561c5SVitaly Wool #include <linux/sched.h>
279a001fc1SVitaly Wool #include <linux/list.h>
289a001fc1SVitaly Wool #include <linux/mm.h>
299a001fc1SVitaly Wool #include <linux/module.h>
30d30561c5SVitaly Wool #include <linux/percpu.h>
319a001fc1SVitaly Wool #include <linux/preempt.h>
32d30561c5SVitaly Wool #include <linux/workqueue.h>
339a001fc1SVitaly Wool #include <linux/slab.h>
349a001fc1SVitaly Wool #include <linux/spinlock.h>
359a001fc1SVitaly Wool #include <linux/zpool.h>
369a001fc1SVitaly Wool 
379a001fc1SVitaly Wool /*****************
389a001fc1SVitaly Wool  * Structures
399a001fc1SVitaly Wool *****************/
40ede93213SVitaly Wool struct z3fold_pool;
41ede93213SVitaly Wool struct z3fold_ops {
42ede93213SVitaly Wool 	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
43ede93213SVitaly Wool };
44ede93213SVitaly Wool 
45ede93213SVitaly Wool enum buddy {
46ede93213SVitaly Wool 	HEADLESS = 0,
47ede93213SVitaly Wool 	FIRST,
48ede93213SVitaly Wool 	MIDDLE,
49ede93213SVitaly Wool 	LAST,
50ede93213SVitaly Wool 	BUDDIES_MAX
51ede93213SVitaly Wool };
52ede93213SVitaly Wool 
53ede93213SVitaly Wool /*
54d30561c5SVitaly Wool  * struct z3fold_header - z3fold page metadata occupying first chunks of each
55ede93213SVitaly Wool  *			z3fold page, except for HEADLESS pages
56d30561c5SVitaly Wool  * @buddy:		links the z3fold page into the relevant list in the
57d30561c5SVitaly Wool  *			pool
582f1e5e4dSVitaly Wool  * @page_lock:		per-page lock
59d30561c5SVitaly Wool  * @refcount:		reference count for the z3fold page
60d30561c5SVitaly Wool  * @work:		work_struct for page layout optimization
61d30561c5SVitaly Wool  * @pool:		pointer to the pool which this page belongs to
62d30561c5SVitaly Wool  * @cpu:		CPU which this page "belongs" to
63ede93213SVitaly Wool  * @first_chunks:	the size of the first buddy in chunks, 0 if free
64ede93213SVitaly Wool  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
65ede93213SVitaly Wool  * @last_chunks:	the size of the last buddy in chunks, 0 if free
66ede93213SVitaly Wool  * @first_num:		the starting number (for the first handle)
67ede93213SVitaly Wool  */
68ede93213SVitaly Wool struct z3fold_header {
69ede93213SVitaly Wool 	struct list_head buddy;
702f1e5e4dSVitaly Wool 	spinlock_t page_lock;
715a27aa82SVitaly Wool 	struct kref refcount;
72d30561c5SVitaly Wool 	struct work_struct work;
73d30561c5SVitaly Wool 	struct z3fold_pool *pool;
74d30561c5SVitaly Wool 	short cpu;
75ede93213SVitaly Wool 	unsigned short first_chunks;
76ede93213SVitaly Wool 	unsigned short middle_chunks;
77ede93213SVitaly Wool 	unsigned short last_chunks;
78ede93213SVitaly Wool 	unsigned short start_middle;
79ede93213SVitaly Wool 	unsigned short first_num:2;
80ede93213SVitaly Wool };
81ede93213SVitaly Wool 
829a001fc1SVitaly Wool /*
839a001fc1SVitaly Wool  * NCHUNKS_ORDER determines the internal allocation granularity, effectively
849a001fc1SVitaly Wool  * adjusting internal fragmentation.  It also determines the number of
859a001fc1SVitaly Wool  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
86ede93213SVitaly Wool  * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
87ede93213SVitaly Wool  * in the beginning of an allocated page are occupied by z3fold header, so
88ede93213SVitaly Wool  * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
89ede93213SVitaly Wool  * which shows the max number of free chunks in z3fold page, also there will
90ede93213SVitaly Wool  * be 63, or 62, respectively, freelists per pool.
919a001fc1SVitaly Wool  */
929a001fc1SVitaly Wool #define NCHUNKS_ORDER	6
939a001fc1SVitaly Wool 
949a001fc1SVitaly Wool #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
959a001fc1SVitaly Wool #define CHUNK_SIZE	(1 << CHUNK_SHIFT)
96ede93213SVitaly Wool #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
97ede93213SVitaly Wool #define ZHDR_CHUNKS	(ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
98ede93213SVitaly Wool #define TOTAL_CHUNKS	(PAGE_SIZE >> CHUNK_SHIFT)
999a001fc1SVitaly Wool #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
1009a001fc1SVitaly Wool 
101f201ebd8Szhong jiang #define BUDDY_MASK	(0x3)
102ca0246bbSVitaly Wool #define BUDDY_SHIFT	2
1039a001fc1SVitaly Wool 
1049a001fc1SVitaly Wool /**
1059a001fc1SVitaly Wool  * struct z3fold_pool - stores metadata for each z3fold pool
106d30561c5SVitaly Wool  * @name:	pool name
107d30561c5SVitaly Wool  * @lock:	protects pool unbuddied/lru lists
108d30561c5SVitaly Wool  * @stale_lock:	protects pool stale page list
109d30561c5SVitaly Wool  * @unbuddied:	per-cpu array of lists tracking z3fold pages that contain 2-
110d30561c5SVitaly Wool  *		buddies; the list each z3fold page is added to depends on
111d30561c5SVitaly Wool  *		the size of its free region.
1129a001fc1SVitaly Wool  * @lru:	list tracking the z3fold pages in LRU order by most recently
1139a001fc1SVitaly Wool  *		added buddy.
114d30561c5SVitaly Wool  * @stale:	list of pages marked for freeing
1159a001fc1SVitaly Wool  * @pages_nr:	number of z3fold pages in the pool.
1169a001fc1SVitaly Wool  * @ops:	pointer to a structure of user defined operations specified at
1179a001fc1SVitaly Wool  *		pool creation time.
118d30561c5SVitaly Wool  * @compact_wq:	workqueue for page layout background optimization
119d30561c5SVitaly Wool  * @release_wq:	workqueue for safe page release
120d30561c5SVitaly Wool  * @work:	work_struct for safe page release
1219a001fc1SVitaly Wool  *
1229a001fc1SVitaly Wool  * This structure is allocated at pool creation time and maintains metadata
1239a001fc1SVitaly Wool  * pertaining to a particular z3fold pool.
1249a001fc1SVitaly Wool  */
1259a001fc1SVitaly Wool struct z3fold_pool {
126d30561c5SVitaly Wool 	const char *name;
1279a001fc1SVitaly Wool 	spinlock_t lock;
128d30561c5SVitaly Wool 	spinlock_t stale_lock;
129d30561c5SVitaly Wool 	struct list_head *unbuddied;
1309a001fc1SVitaly Wool 	struct list_head lru;
131d30561c5SVitaly Wool 	struct list_head stale;
13212d59ae6SVitaly Wool 	atomic64_t pages_nr;
1339a001fc1SVitaly Wool 	const struct z3fold_ops *ops;
1349a001fc1SVitaly Wool 	struct zpool *zpool;
1359a001fc1SVitaly Wool 	const struct zpool_ops *zpool_ops;
136d30561c5SVitaly Wool 	struct workqueue_struct *compact_wq;
137d30561c5SVitaly Wool 	struct workqueue_struct *release_wq;
138d30561c5SVitaly Wool 	struct work_struct work;
1399a001fc1SVitaly Wool };
1409a001fc1SVitaly Wool 
1419a001fc1SVitaly Wool /*
1429a001fc1SVitaly Wool  * Internal z3fold page flags
1439a001fc1SVitaly Wool  */
1449a001fc1SVitaly Wool enum z3fold_page_flags {
1455a27aa82SVitaly Wool 	PAGE_HEADLESS = 0,
1469a001fc1SVitaly Wool 	MIDDLE_CHUNK_MAPPED,
147d30561c5SVitaly Wool 	NEEDS_COMPACTING,
1486098d7e1SVitaly Wool 	PAGE_STALE,
149ca0246bbSVitaly Wool 	PAGE_CLAIMED, /* by either reclaim or free */
1509a001fc1SVitaly Wool };
1519a001fc1SVitaly Wool 
1529a001fc1SVitaly Wool /*****************
1539a001fc1SVitaly Wool  * Helpers
1549a001fc1SVitaly Wool *****************/
1559a001fc1SVitaly Wool 
1569a001fc1SVitaly Wool /* Converts an allocation size in bytes to size in z3fold chunks */
1579a001fc1SVitaly Wool static int size_to_chunks(size_t size)
1589a001fc1SVitaly Wool {
1599a001fc1SVitaly Wool 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
1609a001fc1SVitaly Wool }
1619a001fc1SVitaly Wool 
1629a001fc1SVitaly Wool #define for_each_unbuddied_list(_iter, _begin) \
1639a001fc1SVitaly Wool 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
1649a001fc1SVitaly Wool 
165d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w);
166d30561c5SVitaly Wool 
1679a001fc1SVitaly Wool /* Initializes the z3fold header of a newly allocated z3fold page */
168d30561c5SVitaly Wool static struct z3fold_header *init_z3fold_page(struct page *page,
169d30561c5SVitaly Wool 					struct z3fold_pool *pool)
1709a001fc1SVitaly Wool {
1719a001fc1SVitaly Wool 	struct z3fold_header *zhdr = page_address(page);
1729a001fc1SVitaly Wool 
1739a001fc1SVitaly Wool 	INIT_LIST_HEAD(&page->lru);
1749a001fc1SVitaly Wool 	clear_bit(PAGE_HEADLESS, &page->private);
1759a001fc1SVitaly Wool 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
176d30561c5SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
177d30561c5SVitaly Wool 	clear_bit(PAGE_STALE, &page->private);
178ca0246bbSVitaly Wool 	clear_bit(PAGE_CLAIMED, &page->private);
1799a001fc1SVitaly Wool 
1802f1e5e4dSVitaly Wool 	spin_lock_init(&zhdr->page_lock);
1815a27aa82SVitaly Wool 	kref_init(&zhdr->refcount);
1829a001fc1SVitaly Wool 	zhdr->first_chunks = 0;
1839a001fc1SVitaly Wool 	zhdr->middle_chunks = 0;
1849a001fc1SVitaly Wool 	zhdr->last_chunks = 0;
1859a001fc1SVitaly Wool 	zhdr->first_num = 0;
1869a001fc1SVitaly Wool 	zhdr->start_middle = 0;
187d30561c5SVitaly Wool 	zhdr->cpu = -1;
188d30561c5SVitaly Wool 	zhdr->pool = pool;
1899a001fc1SVitaly Wool 	INIT_LIST_HEAD(&zhdr->buddy);
190d30561c5SVitaly Wool 	INIT_WORK(&zhdr->work, compact_page_work);
1919a001fc1SVitaly Wool 	return zhdr;
1929a001fc1SVitaly Wool }
1939a001fc1SVitaly Wool 
1949a001fc1SVitaly Wool /* Resets the struct page fields and frees the page */
1955a27aa82SVitaly Wool static void free_z3fold_page(struct page *page)
1969a001fc1SVitaly Wool {
1975a27aa82SVitaly Wool 	__free_page(page);
1985a27aa82SVitaly Wool }
1995a27aa82SVitaly Wool 
2002f1e5e4dSVitaly Wool /* Lock a z3fold page */
2012f1e5e4dSVitaly Wool static inline void z3fold_page_lock(struct z3fold_header *zhdr)
2022f1e5e4dSVitaly Wool {
2032f1e5e4dSVitaly Wool 	spin_lock(&zhdr->page_lock);
2042f1e5e4dSVitaly Wool }
2052f1e5e4dSVitaly Wool 
20676e32a2aSVitaly Wool /* Try to lock a z3fold page */
20776e32a2aSVitaly Wool static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
20876e32a2aSVitaly Wool {
20976e32a2aSVitaly Wool 	return spin_trylock(&zhdr->page_lock);
21076e32a2aSVitaly Wool }
21176e32a2aSVitaly Wool 
2122f1e5e4dSVitaly Wool /* Unlock a z3fold page */
2132f1e5e4dSVitaly Wool static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
2142f1e5e4dSVitaly Wool {
2152f1e5e4dSVitaly Wool 	spin_unlock(&zhdr->page_lock);
2162f1e5e4dSVitaly Wool }
2172f1e5e4dSVitaly Wool 
2189a001fc1SVitaly Wool /*
2199a001fc1SVitaly Wool  * Encodes the handle of a particular buddy within a z3fold page
2209a001fc1SVitaly Wool  * Pool lock should be held as this function accesses first_num
2219a001fc1SVitaly Wool  */
2229a001fc1SVitaly Wool static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
2239a001fc1SVitaly Wool {
2249a001fc1SVitaly Wool 	unsigned long handle;
2259a001fc1SVitaly Wool 
2269a001fc1SVitaly Wool 	handle = (unsigned long)zhdr;
227ca0246bbSVitaly Wool 	if (bud != HEADLESS) {
228ca0246bbSVitaly Wool 		handle |= (bud + zhdr->first_num) & BUDDY_MASK;
229ca0246bbSVitaly Wool 		if (bud == LAST)
230ca0246bbSVitaly Wool 			handle |= (zhdr->last_chunks << BUDDY_SHIFT);
231ca0246bbSVitaly Wool 	}
2329a001fc1SVitaly Wool 	return handle;
2339a001fc1SVitaly Wool }
2349a001fc1SVitaly Wool 
2359a001fc1SVitaly Wool /* Returns the z3fold page where a given handle is stored */
2369a001fc1SVitaly Wool static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
2379a001fc1SVitaly Wool {
2389a001fc1SVitaly Wool 	return (struct z3fold_header *)(handle & PAGE_MASK);
2399a001fc1SVitaly Wool }
2409a001fc1SVitaly Wool 
241ca0246bbSVitaly Wool /* only for LAST bud, returns zero otherwise */
242ca0246bbSVitaly Wool static unsigned short handle_to_chunks(unsigned long handle)
243ca0246bbSVitaly Wool {
244ca0246bbSVitaly Wool 	return (handle & ~PAGE_MASK) >> BUDDY_SHIFT;
245ca0246bbSVitaly Wool }
246ca0246bbSVitaly Wool 
247f201ebd8Szhong jiang /*
248f201ebd8Szhong jiang  * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
249f201ebd8Szhong jiang  *  but that doesn't matter. because the masking will result in the
250f201ebd8Szhong jiang  *  correct buddy number.
251f201ebd8Szhong jiang  */
2529a001fc1SVitaly Wool static enum buddy handle_to_buddy(unsigned long handle)
2539a001fc1SVitaly Wool {
2549a001fc1SVitaly Wool 	struct z3fold_header *zhdr = handle_to_z3fold_header(handle);
2559a001fc1SVitaly Wool 	return (handle - zhdr->first_num) & BUDDY_MASK;
2569a001fc1SVitaly Wool }
2579a001fc1SVitaly Wool 
2589050cce1SVitaly Wool static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
2599050cce1SVitaly Wool {
2609050cce1SVitaly Wool 	return zhdr->pool;
2619050cce1SVitaly Wool }
2629050cce1SVitaly Wool 
263d30561c5SVitaly Wool static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
264d30561c5SVitaly Wool {
265d30561c5SVitaly Wool 	struct page *page = virt_to_page(zhdr);
2669050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
267d30561c5SVitaly Wool 
268d30561c5SVitaly Wool 	WARN_ON(!list_empty(&zhdr->buddy));
269d30561c5SVitaly Wool 	set_bit(PAGE_STALE, &page->private);
27035529357SVitaly Wool 	clear_bit(NEEDS_COMPACTING, &page->private);
271d30561c5SVitaly Wool 	spin_lock(&pool->lock);
272d30561c5SVitaly Wool 	if (!list_empty(&page->lru))
273d30561c5SVitaly Wool 		list_del(&page->lru);
274d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
275d30561c5SVitaly Wool 	if (locked)
276d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
277d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
278d30561c5SVitaly Wool 	list_add(&zhdr->buddy, &pool->stale);
279d30561c5SVitaly Wool 	queue_work(pool->release_wq, &pool->work);
280d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
281d30561c5SVitaly Wool }
282d30561c5SVitaly Wool 
283d30561c5SVitaly Wool static void __attribute__((__unused__))
284d30561c5SVitaly Wool 			release_z3fold_page(struct kref *ref)
285d30561c5SVitaly Wool {
286d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
287d30561c5SVitaly Wool 						refcount);
288d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, false);
289d30561c5SVitaly Wool }
290d30561c5SVitaly Wool 
291d30561c5SVitaly Wool static void release_z3fold_page_locked(struct kref *ref)
292d30561c5SVitaly Wool {
293d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
294d30561c5SVitaly Wool 						refcount);
295d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
296d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
297d30561c5SVitaly Wool }
298d30561c5SVitaly Wool 
299d30561c5SVitaly Wool static void release_z3fold_page_locked_list(struct kref *ref)
300d30561c5SVitaly Wool {
301d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
302d30561c5SVitaly Wool 					       refcount);
3039050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
3049050cce1SVitaly Wool 	spin_lock(&pool->lock);
305d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
3069050cce1SVitaly Wool 	spin_unlock(&pool->lock);
307d30561c5SVitaly Wool 
308d30561c5SVitaly Wool 	WARN_ON(z3fold_page_trylock(zhdr));
309d30561c5SVitaly Wool 	__release_z3fold_page(zhdr, true);
310d30561c5SVitaly Wool }
311d30561c5SVitaly Wool 
312d30561c5SVitaly Wool static void free_pages_work(struct work_struct *w)
313d30561c5SVitaly Wool {
314d30561c5SVitaly Wool 	struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work);
315d30561c5SVitaly Wool 
316d30561c5SVitaly Wool 	spin_lock(&pool->stale_lock);
317d30561c5SVitaly Wool 	while (!list_empty(&pool->stale)) {
318d30561c5SVitaly Wool 		struct z3fold_header *zhdr = list_first_entry(&pool->stale,
319d30561c5SVitaly Wool 						struct z3fold_header, buddy);
320d30561c5SVitaly Wool 		struct page *page = virt_to_page(zhdr);
321d30561c5SVitaly Wool 
322d30561c5SVitaly Wool 		list_del(&zhdr->buddy);
323d30561c5SVitaly Wool 		if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
324d30561c5SVitaly Wool 			continue;
325d30561c5SVitaly Wool 		spin_unlock(&pool->stale_lock);
326d30561c5SVitaly Wool 		cancel_work_sync(&zhdr->work);
327d30561c5SVitaly Wool 		free_z3fold_page(page);
328d30561c5SVitaly Wool 		cond_resched();
329d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
330d30561c5SVitaly Wool 	}
331d30561c5SVitaly Wool 	spin_unlock(&pool->stale_lock);
332d30561c5SVitaly Wool }
333d30561c5SVitaly Wool 
3349a001fc1SVitaly Wool /*
3359a001fc1SVitaly Wool  * Returns the number of free chunks in a z3fold page.
3369a001fc1SVitaly Wool  * NB: can't be used with HEADLESS pages.
3379a001fc1SVitaly Wool  */
3389a001fc1SVitaly Wool static int num_free_chunks(struct z3fold_header *zhdr)
3399a001fc1SVitaly Wool {
3409a001fc1SVitaly Wool 	int nfree;
3419a001fc1SVitaly Wool 	/*
3429a001fc1SVitaly Wool 	 * If there is a middle object, pick up the bigger free space
3439a001fc1SVitaly Wool 	 * either before or after it. Otherwise just subtract the number
3449a001fc1SVitaly Wool 	 * of chunks occupied by the first and the last objects.
3459a001fc1SVitaly Wool 	 */
3469a001fc1SVitaly Wool 	if (zhdr->middle_chunks != 0) {
3479a001fc1SVitaly Wool 		int nfree_before = zhdr->first_chunks ?
348ede93213SVitaly Wool 			0 : zhdr->start_middle - ZHDR_CHUNKS;
3499a001fc1SVitaly Wool 		int nfree_after = zhdr->last_chunks ?
350ede93213SVitaly Wool 			0 : TOTAL_CHUNKS -
351ede93213SVitaly Wool 				(zhdr->start_middle + zhdr->middle_chunks);
3529a001fc1SVitaly Wool 		nfree = max(nfree_before, nfree_after);
3539a001fc1SVitaly Wool 	} else
3549a001fc1SVitaly Wool 		nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
3559a001fc1SVitaly Wool 	return nfree;
3569a001fc1SVitaly Wool }
3579a001fc1SVitaly Wool 
3589050cce1SVitaly Wool /* Add to the appropriate unbuddied list */
3599050cce1SVitaly Wool static inline void add_to_unbuddied(struct z3fold_pool *pool,
3609050cce1SVitaly Wool 				struct z3fold_header *zhdr)
3619050cce1SVitaly Wool {
3629050cce1SVitaly Wool 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
3639050cce1SVitaly Wool 			zhdr->middle_chunks == 0) {
3649050cce1SVitaly Wool 		struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
3659050cce1SVitaly Wool 
3669050cce1SVitaly Wool 		int freechunks = num_free_chunks(zhdr);
3679050cce1SVitaly Wool 		spin_lock(&pool->lock);
3689050cce1SVitaly Wool 		list_add(&zhdr->buddy, &unbuddied[freechunks]);
3699050cce1SVitaly Wool 		spin_unlock(&pool->lock);
3709050cce1SVitaly Wool 		zhdr->cpu = smp_processor_id();
3719050cce1SVitaly Wool 		put_cpu_ptr(pool->unbuddied);
3729050cce1SVitaly Wool 	}
3739050cce1SVitaly Wool }
3749050cce1SVitaly Wool 
375ede93213SVitaly Wool static inline void *mchunk_memmove(struct z3fold_header *zhdr,
376ede93213SVitaly Wool 				unsigned short dst_chunk)
377ede93213SVitaly Wool {
378ede93213SVitaly Wool 	void *beg = zhdr;
379ede93213SVitaly Wool 	return memmove(beg + (dst_chunk << CHUNK_SHIFT),
380ede93213SVitaly Wool 		       beg + (zhdr->start_middle << CHUNK_SHIFT),
381ede93213SVitaly Wool 		       zhdr->middle_chunks << CHUNK_SHIFT);
382ede93213SVitaly Wool }
383ede93213SVitaly Wool 
3841b096e5aSVitaly Wool #define BIG_CHUNK_GAP	3
3859a001fc1SVitaly Wool /* Has to be called with lock held */
3869a001fc1SVitaly Wool static int z3fold_compact_page(struct z3fold_header *zhdr)
3879a001fc1SVitaly Wool {
3889a001fc1SVitaly Wool 	struct page *page = virt_to_page(zhdr);
3899a001fc1SVitaly Wool 
390ede93213SVitaly Wool 	if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
391ede93213SVitaly Wool 		return 0; /* can't move middle chunk, it's used */
3929a001fc1SVitaly Wool 
393ede93213SVitaly Wool 	if (zhdr->middle_chunks == 0)
394ede93213SVitaly Wool 		return 0; /* nothing to compact */
395ede93213SVitaly Wool 
396ede93213SVitaly Wool 	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
397ede93213SVitaly Wool 		/* move to the beginning */
398ede93213SVitaly Wool 		mchunk_memmove(zhdr, ZHDR_CHUNKS);
3999a001fc1SVitaly Wool 		zhdr->first_chunks = zhdr->middle_chunks;
4009a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
4019a001fc1SVitaly Wool 		zhdr->start_middle = 0;
4029a001fc1SVitaly Wool 		zhdr->first_num++;
403ede93213SVitaly Wool 		return 1;
4049a001fc1SVitaly Wool 	}
4059a001fc1SVitaly Wool 
4061b096e5aSVitaly Wool 	/*
4071b096e5aSVitaly Wool 	 * moving data is expensive, so let's only do that if
4081b096e5aSVitaly Wool 	 * there's substantial gain (at least BIG_CHUNK_GAP chunks)
4091b096e5aSVitaly Wool 	 */
4101b096e5aSVitaly Wool 	if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
4111b096e5aSVitaly Wool 	    zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
4121b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
4131b096e5aSVitaly Wool 		mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
4141b096e5aSVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
4151b096e5aSVitaly Wool 		return 1;
4161b096e5aSVitaly Wool 	} else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
4171b096e5aSVitaly Wool 		   TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
4181b096e5aSVitaly Wool 					+ zhdr->middle_chunks) >=
4191b096e5aSVitaly Wool 			BIG_CHUNK_GAP) {
4201b096e5aSVitaly Wool 		unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
4211b096e5aSVitaly Wool 			zhdr->middle_chunks;
4221b096e5aSVitaly Wool 		mchunk_memmove(zhdr, new_start);
4231b096e5aSVitaly Wool 		zhdr->start_middle = new_start;
4241b096e5aSVitaly Wool 		return 1;
4251b096e5aSVitaly Wool 	}
4261b096e5aSVitaly Wool 
4271b096e5aSVitaly Wool 	return 0;
4281b096e5aSVitaly Wool }
4291b096e5aSVitaly Wool 
430d30561c5SVitaly Wool static void do_compact_page(struct z3fold_header *zhdr, bool locked)
431d30561c5SVitaly Wool {
4329050cce1SVitaly Wool 	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
433d30561c5SVitaly Wool 	struct page *page;
434d30561c5SVitaly Wool 
435d30561c5SVitaly Wool 	page = virt_to_page(zhdr);
436d30561c5SVitaly Wool 	if (locked)
437d30561c5SVitaly Wool 		WARN_ON(z3fold_page_trylock(zhdr));
438d30561c5SVitaly Wool 	else
439d30561c5SVitaly Wool 		z3fold_page_lock(zhdr);
4405d03a661SVitaly Wool 	if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
441d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
442d30561c5SVitaly Wool 		return;
443d30561c5SVitaly Wool 	}
444d30561c5SVitaly Wool 	spin_lock(&pool->lock);
445d30561c5SVitaly Wool 	list_del_init(&zhdr->buddy);
446d30561c5SVitaly Wool 	spin_unlock(&pool->lock);
447d30561c5SVitaly Wool 
4485d03a661SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
4495d03a661SVitaly Wool 		atomic64_dec(&pool->pages_nr);
4505d03a661SVitaly Wool 		return;
4515d03a661SVitaly Wool 	}
4525d03a661SVitaly Wool 
453d30561c5SVitaly Wool 	z3fold_compact_page(zhdr);
4549050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
455d30561c5SVitaly Wool 	z3fold_page_unlock(zhdr);
456d30561c5SVitaly Wool }
457d30561c5SVitaly Wool 
458d30561c5SVitaly Wool static void compact_page_work(struct work_struct *w)
459d30561c5SVitaly Wool {
460d30561c5SVitaly Wool 	struct z3fold_header *zhdr = container_of(w, struct z3fold_header,
461d30561c5SVitaly Wool 						work);
462d30561c5SVitaly Wool 
463d30561c5SVitaly Wool 	do_compact_page(zhdr, false);
464d30561c5SVitaly Wool }
465d30561c5SVitaly Wool 
4669050cce1SVitaly Wool /* returns _locked_ z3fold page header or NULL */
4679050cce1SVitaly Wool static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
4689050cce1SVitaly Wool 						size_t size, bool can_sleep)
4699050cce1SVitaly Wool {
4709050cce1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
4719050cce1SVitaly Wool 	struct page *page;
4729050cce1SVitaly Wool 	struct list_head *unbuddied;
4739050cce1SVitaly Wool 	int chunks = size_to_chunks(size), i;
4749050cce1SVitaly Wool 
4759050cce1SVitaly Wool lookup:
4769050cce1SVitaly Wool 	/* First, try to find an unbuddied z3fold page. */
4779050cce1SVitaly Wool 	unbuddied = get_cpu_ptr(pool->unbuddied);
4789050cce1SVitaly Wool 	for_each_unbuddied_list(i, chunks) {
4799050cce1SVitaly Wool 		struct list_head *l = &unbuddied[i];
4809050cce1SVitaly Wool 
4819050cce1SVitaly Wool 		zhdr = list_first_entry_or_null(READ_ONCE(l),
4829050cce1SVitaly Wool 					struct z3fold_header, buddy);
4839050cce1SVitaly Wool 
4849050cce1SVitaly Wool 		if (!zhdr)
4859050cce1SVitaly Wool 			continue;
4869050cce1SVitaly Wool 
4879050cce1SVitaly Wool 		/* Re-check under lock. */
4889050cce1SVitaly Wool 		spin_lock(&pool->lock);
4899050cce1SVitaly Wool 		l = &unbuddied[i];
4909050cce1SVitaly Wool 		if (unlikely(zhdr != list_first_entry(READ_ONCE(l),
4919050cce1SVitaly Wool 						struct z3fold_header, buddy)) ||
4929050cce1SVitaly Wool 		    !z3fold_page_trylock(zhdr)) {
4939050cce1SVitaly Wool 			spin_unlock(&pool->lock);
4949050cce1SVitaly Wool 			zhdr = NULL;
4959050cce1SVitaly Wool 			put_cpu_ptr(pool->unbuddied);
4969050cce1SVitaly Wool 			if (can_sleep)
4979050cce1SVitaly Wool 				cond_resched();
4989050cce1SVitaly Wool 			goto lookup;
4999050cce1SVitaly Wool 		}
5009050cce1SVitaly Wool 		list_del_init(&zhdr->buddy);
5019050cce1SVitaly Wool 		zhdr->cpu = -1;
5029050cce1SVitaly Wool 		spin_unlock(&pool->lock);
5039050cce1SVitaly Wool 
5049050cce1SVitaly Wool 		page = virt_to_page(zhdr);
5059050cce1SVitaly Wool 		if (test_bit(NEEDS_COMPACTING, &page->private)) {
5069050cce1SVitaly Wool 			z3fold_page_unlock(zhdr);
5079050cce1SVitaly Wool 			zhdr = NULL;
5089050cce1SVitaly Wool 			put_cpu_ptr(pool->unbuddied);
5099050cce1SVitaly Wool 			if (can_sleep)
5109050cce1SVitaly Wool 				cond_resched();
5119050cce1SVitaly Wool 			goto lookup;
5129050cce1SVitaly Wool 		}
5139050cce1SVitaly Wool 
5149050cce1SVitaly Wool 		/*
5159050cce1SVitaly Wool 		 * this page could not be removed from its unbuddied
5169050cce1SVitaly Wool 		 * list while pool lock was held, and then we've taken
5179050cce1SVitaly Wool 		 * page lock so kref_put could not be called before
5189050cce1SVitaly Wool 		 * we got here, so it's safe to just call kref_get()
5199050cce1SVitaly Wool 		 */
5209050cce1SVitaly Wool 		kref_get(&zhdr->refcount);
5219050cce1SVitaly Wool 		break;
5229050cce1SVitaly Wool 	}
5239050cce1SVitaly Wool 	put_cpu_ptr(pool->unbuddied);
5249050cce1SVitaly Wool 
525*351618b2SVitaly Wool 	if (!zhdr) {
526*351618b2SVitaly Wool 		int cpu;
527*351618b2SVitaly Wool 
528*351618b2SVitaly Wool 		/* look for _exact_ match on other cpus' lists */
529*351618b2SVitaly Wool 		for_each_online_cpu(cpu) {
530*351618b2SVitaly Wool 			struct list_head *l;
531*351618b2SVitaly Wool 
532*351618b2SVitaly Wool 			unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
533*351618b2SVitaly Wool 			spin_lock(&pool->lock);
534*351618b2SVitaly Wool 			l = &unbuddied[chunks];
535*351618b2SVitaly Wool 
536*351618b2SVitaly Wool 			zhdr = list_first_entry_or_null(READ_ONCE(l),
537*351618b2SVitaly Wool 						struct z3fold_header, buddy);
538*351618b2SVitaly Wool 
539*351618b2SVitaly Wool 			if (!zhdr || !z3fold_page_trylock(zhdr)) {
540*351618b2SVitaly Wool 				spin_unlock(&pool->lock);
541*351618b2SVitaly Wool 				zhdr = NULL;
542*351618b2SVitaly Wool 				continue;
543*351618b2SVitaly Wool 			}
544*351618b2SVitaly Wool 			list_del_init(&zhdr->buddy);
545*351618b2SVitaly Wool 			zhdr->cpu = -1;
546*351618b2SVitaly Wool 			spin_unlock(&pool->lock);
547*351618b2SVitaly Wool 
548*351618b2SVitaly Wool 			page = virt_to_page(zhdr);
549*351618b2SVitaly Wool 			if (test_bit(NEEDS_COMPACTING, &page->private)) {
550*351618b2SVitaly Wool 				z3fold_page_unlock(zhdr);
551*351618b2SVitaly Wool 				zhdr = NULL;
552*351618b2SVitaly Wool 				if (can_sleep)
553*351618b2SVitaly Wool 					cond_resched();
554*351618b2SVitaly Wool 				continue;
555*351618b2SVitaly Wool 			}
556*351618b2SVitaly Wool 			kref_get(&zhdr->refcount);
557*351618b2SVitaly Wool 			break;
558*351618b2SVitaly Wool 		}
559*351618b2SVitaly Wool 	}
560*351618b2SVitaly Wool 
5619050cce1SVitaly Wool 	return zhdr;
5629050cce1SVitaly Wool }
563d30561c5SVitaly Wool 
564d30561c5SVitaly Wool /*
565d30561c5SVitaly Wool  * API Functions
566d30561c5SVitaly Wool  */
567d30561c5SVitaly Wool 
568d30561c5SVitaly Wool /**
569d30561c5SVitaly Wool  * z3fold_create_pool() - create a new z3fold pool
570d30561c5SVitaly Wool  * @name:	pool name
571d30561c5SVitaly Wool  * @gfp:	gfp flags when allocating the z3fold pool structure
572d30561c5SVitaly Wool  * @ops:	user-defined operations for the z3fold pool
573d30561c5SVitaly Wool  *
574d30561c5SVitaly Wool  * Return: pointer to the new z3fold pool or NULL if the metadata allocation
575d30561c5SVitaly Wool  * failed.
576d30561c5SVitaly Wool  */
577d30561c5SVitaly Wool static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
578d30561c5SVitaly Wool 		const struct z3fold_ops *ops)
579d30561c5SVitaly Wool {
580d30561c5SVitaly Wool 	struct z3fold_pool *pool = NULL;
581d30561c5SVitaly Wool 	int i, cpu;
582d30561c5SVitaly Wool 
583d30561c5SVitaly Wool 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
584d30561c5SVitaly Wool 	if (!pool)
585d30561c5SVitaly Wool 		goto out;
586d30561c5SVitaly Wool 	spin_lock_init(&pool->lock);
587d30561c5SVitaly Wool 	spin_lock_init(&pool->stale_lock);
588d30561c5SVitaly Wool 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
5891ec6995dSXidong Wang 	if (!pool->unbuddied)
5901ec6995dSXidong Wang 		goto out_pool;
591d30561c5SVitaly Wool 	for_each_possible_cpu(cpu) {
592d30561c5SVitaly Wool 		struct list_head *unbuddied =
593d30561c5SVitaly Wool 				per_cpu_ptr(pool->unbuddied, cpu);
594d30561c5SVitaly Wool 		for_each_unbuddied_list(i, 0)
595d30561c5SVitaly Wool 			INIT_LIST_HEAD(&unbuddied[i]);
596d30561c5SVitaly Wool 	}
597d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->lru);
598d30561c5SVitaly Wool 	INIT_LIST_HEAD(&pool->stale);
599d30561c5SVitaly Wool 	atomic64_set(&pool->pages_nr, 0);
600d30561c5SVitaly Wool 	pool->name = name;
601d30561c5SVitaly Wool 	pool->compact_wq = create_singlethread_workqueue(pool->name);
602d30561c5SVitaly Wool 	if (!pool->compact_wq)
6031ec6995dSXidong Wang 		goto out_unbuddied;
604d30561c5SVitaly Wool 	pool->release_wq = create_singlethread_workqueue(pool->name);
605d30561c5SVitaly Wool 	if (!pool->release_wq)
606d30561c5SVitaly Wool 		goto out_wq;
607d30561c5SVitaly Wool 	INIT_WORK(&pool->work, free_pages_work);
608d30561c5SVitaly Wool 	pool->ops = ops;
609d30561c5SVitaly Wool 	return pool;
610d30561c5SVitaly Wool 
611d30561c5SVitaly Wool out_wq:
612d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
6131ec6995dSXidong Wang out_unbuddied:
6141ec6995dSXidong Wang 	free_percpu(pool->unbuddied);
6151ec6995dSXidong Wang out_pool:
616d30561c5SVitaly Wool 	kfree(pool);
6171ec6995dSXidong Wang out:
618d30561c5SVitaly Wool 	return NULL;
619d30561c5SVitaly Wool }
620d30561c5SVitaly Wool 
621d30561c5SVitaly Wool /**
622d30561c5SVitaly Wool  * z3fold_destroy_pool() - destroys an existing z3fold pool
623d30561c5SVitaly Wool  * @pool:	the z3fold pool to be destroyed
624d30561c5SVitaly Wool  *
625d30561c5SVitaly Wool  * The pool should be emptied before this function is called.
626d30561c5SVitaly Wool  */
627d30561c5SVitaly Wool static void z3fold_destroy_pool(struct z3fold_pool *pool)
628d30561c5SVitaly Wool {
629d30561c5SVitaly Wool 	destroy_workqueue(pool->release_wq);
630d30561c5SVitaly Wool 	destroy_workqueue(pool->compact_wq);
631d30561c5SVitaly Wool 	kfree(pool);
632d30561c5SVitaly Wool }
633d30561c5SVitaly Wool 
6349a001fc1SVitaly Wool /**
6359a001fc1SVitaly Wool  * z3fold_alloc() - allocates a region of a given size
6369a001fc1SVitaly Wool  * @pool:	z3fold pool from which to allocate
6379a001fc1SVitaly Wool  * @size:	size in bytes of the desired allocation
6389a001fc1SVitaly Wool  * @gfp:	gfp flags used if the pool needs to grow
6399a001fc1SVitaly Wool  * @handle:	handle of the new allocation
6409a001fc1SVitaly Wool  *
6419a001fc1SVitaly Wool  * This function will attempt to find a free region in the pool large enough to
6429a001fc1SVitaly Wool  * satisfy the allocation request.  A search of the unbuddied lists is
6439a001fc1SVitaly Wool  * performed first. If no suitable free region is found, then a new page is
6449a001fc1SVitaly Wool  * allocated and added to the pool to satisfy the request.
6459a001fc1SVitaly Wool  *
6469a001fc1SVitaly Wool  * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
6479a001fc1SVitaly Wool  * as z3fold pool pages.
6489a001fc1SVitaly Wool  *
6499a001fc1SVitaly Wool  * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
6509a001fc1SVitaly Wool  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
6519a001fc1SVitaly Wool  * a new page.
6529a001fc1SVitaly Wool  */
6539a001fc1SVitaly Wool static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
6549a001fc1SVitaly Wool 			unsigned long *handle)
6559a001fc1SVitaly Wool {
6569050cce1SVitaly Wool 	int chunks = size_to_chunks(size);
6579a001fc1SVitaly Wool 	struct z3fold_header *zhdr = NULL;
658d30561c5SVitaly Wool 	struct page *page = NULL;
6599a001fc1SVitaly Wool 	enum buddy bud;
6608a97ea54SMatthew Wilcox 	bool can_sleep = gfpflags_allow_blocking(gfp);
6619a001fc1SVitaly Wool 
6629a001fc1SVitaly Wool 	if (!size || (gfp & __GFP_HIGHMEM))
6639a001fc1SVitaly Wool 		return -EINVAL;
6649a001fc1SVitaly Wool 
6659a001fc1SVitaly Wool 	if (size > PAGE_SIZE)
6669a001fc1SVitaly Wool 		return -ENOSPC;
6679a001fc1SVitaly Wool 
6689a001fc1SVitaly Wool 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
6699a001fc1SVitaly Wool 		bud = HEADLESS;
6709a001fc1SVitaly Wool 	else {
6719050cce1SVitaly Wool retry:
6729050cce1SVitaly Wool 		zhdr = __z3fold_alloc(pool, size, can_sleep);
673d30561c5SVitaly Wool 		if (zhdr) {
6749a001fc1SVitaly Wool 			if (zhdr->first_chunks == 0) {
6759a001fc1SVitaly Wool 				if (zhdr->middle_chunks != 0 &&
6769a001fc1SVitaly Wool 				    chunks >= zhdr->start_middle)
6779a001fc1SVitaly Wool 					bud = LAST;
6789a001fc1SVitaly Wool 				else
6799a001fc1SVitaly Wool 					bud = FIRST;
6809a001fc1SVitaly Wool 			} else if (zhdr->last_chunks == 0)
6819a001fc1SVitaly Wool 				bud = LAST;
6829a001fc1SVitaly Wool 			else if (zhdr->middle_chunks == 0)
6839a001fc1SVitaly Wool 				bud = MIDDLE;
6849a001fc1SVitaly Wool 			else {
6855a27aa82SVitaly Wool 				if (kref_put(&zhdr->refcount,
686d30561c5SVitaly Wool 					     release_z3fold_page_locked))
6875a27aa82SVitaly Wool 					atomic64_dec(&pool->pages_nr);
688d30561c5SVitaly Wool 				else
689d30561c5SVitaly Wool 					z3fold_page_unlock(zhdr);
6909a001fc1SVitaly Wool 				pr_err("No free chunks in unbuddied\n");
6919a001fc1SVitaly Wool 				WARN_ON(1);
6929050cce1SVitaly Wool 				goto retry;
6939a001fc1SVitaly Wool 			}
6949050cce1SVitaly Wool 			page = virt_to_page(zhdr);
6959a001fc1SVitaly Wool 			goto found;
6969a001fc1SVitaly Wool 		}
6979a001fc1SVitaly Wool 		bud = FIRST;
6989a001fc1SVitaly Wool 	}
6999a001fc1SVitaly Wool 
7005c9bab59SVitaly Wool 	page = NULL;
7015c9bab59SVitaly Wool 	if (can_sleep) {
702d30561c5SVitaly Wool 		spin_lock(&pool->stale_lock);
703d30561c5SVitaly Wool 		zhdr = list_first_entry_or_null(&pool->stale,
704d30561c5SVitaly Wool 						struct z3fold_header, buddy);
705d30561c5SVitaly Wool 		/*
7065c9bab59SVitaly Wool 		 * Before allocating a page, let's see if we can take one from
7075c9bab59SVitaly Wool 		 * the stale pages list. cancel_work_sync() can sleep so we
7085c9bab59SVitaly Wool 		 * limit this case to the contexts where we can sleep
709d30561c5SVitaly Wool 		 */
7105c9bab59SVitaly Wool 		if (zhdr) {
711d30561c5SVitaly Wool 			list_del(&zhdr->buddy);
712d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
713d30561c5SVitaly Wool 			cancel_work_sync(&zhdr->work);
714d30561c5SVitaly Wool 			page = virt_to_page(zhdr);
715d30561c5SVitaly Wool 		} else {
716d30561c5SVitaly Wool 			spin_unlock(&pool->stale_lock);
717d30561c5SVitaly Wool 		}
7185c9bab59SVitaly Wool 	}
7195c9bab59SVitaly Wool 	if (!page)
7205c9bab59SVitaly Wool 		page = alloc_page(gfp);
721d30561c5SVitaly Wool 
7229a001fc1SVitaly Wool 	if (!page)
7239a001fc1SVitaly Wool 		return -ENOMEM;
7242f1e5e4dSVitaly Wool 
725d30561c5SVitaly Wool 	zhdr = init_z3fold_page(page, pool);
7269050cce1SVitaly Wool 	if (!zhdr) {
7279050cce1SVitaly Wool 		__free_page(page);
7289050cce1SVitaly Wool 		return -ENOMEM;
7299050cce1SVitaly Wool 	}
7309050cce1SVitaly Wool 	atomic64_inc(&pool->pages_nr);
7319a001fc1SVitaly Wool 
7329a001fc1SVitaly Wool 	if (bud == HEADLESS) {
7339a001fc1SVitaly Wool 		set_bit(PAGE_HEADLESS, &page->private);
7349a001fc1SVitaly Wool 		goto headless;
7359a001fc1SVitaly Wool 	}
7362f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
7379a001fc1SVitaly Wool 
7389a001fc1SVitaly Wool found:
7399a001fc1SVitaly Wool 	if (bud == FIRST)
7409a001fc1SVitaly Wool 		zhdr->first_chunks = chunks;
7419a001fc1SVitaly Wool 	else if (bud == LAST)
7429a001fc1SVitaly Wool 		zhdr->last_chunks = chunks;
7439a001fc1SVitaly Wool 	else {
7449a001fc1SVitaly Wool 		zhdr->middle_chunks = chunks;
745ede93213SVitaly Wool 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
7469a001fc1SVitaly Wool 	}
7479050cce1SVitaly Wool 	add_to_unbuddied(pool, zhdr);
7489a001fc1SVitaly Wool 
7499a001fc1SVitaly Wool headless:
750d30561c5SVitaly Wool 	spin_lock(&pool->lock);
7519a001fc1SVitaly Wool 	/* Add/move z3fold page to beginning of LRU */
7529a001fc1SVitaly Wool 	if (!list_empty(&page->lru))
7539a001fc1SVitaly Wool 		list_del(&page->lru);
7549a001fc1SVitaly Wool 
7559a001fc1SVitaly Wool 	list_add(&page->lru, &pool->lru);
7569a001fc1SVitaly Wool 
7579a001fc1SVitaly Wool 	*handle = encode_handle(zhdr, bud);
7589a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
7592f1e5e4dSVitaly Wool 	if (bud != HEADLESS)
7602f1e5e4dSVitaly Wool 		z3fold_page_unlock(zhdr);
7619a001fc1SVitaly Wool 
7629a001fc1SVitaly Wool 	return 0;
7639a001fc1SVitaly Wool }
7649a001fc1SVitaly Wool 
7659a001fc1SVitaly Wool /**
7669a001fc1SVitaly Wool  * z3fold_free() - frees the allocation associated with the given handle
7679a001fc1SVitaly Wool  * @pool:	pool in which the allocation resided
7689a001fc1SVitaly Wool  * @handle:	handle associated with the allocation returned by z3fold_alloc()
7699a001fc1SVitaly Wool  *
7709a001fc1SVitaly Wool  * In the case that the z3fold page in which the allocation resides is under
7719a001fc1SVitaly Wool  * reclaim, as indicated by the PG_reclaim flag being set, this function
7729a001fc1SVitaly Wool  * only sets the first|last_chunks to 0.  The page is actually freed
7739a001fc1SVitaly Wool  * once both buddies are evicted (see z3fold_reclaim_page() below).
7749a001fc1SVitaly Wool  */
7759a001fc1SVitaly Wool static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
7769a001fc1SVitaly Wool {
7779a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
7789a001fc1SVitaly Wool 	struct page *page;
7799a001fc1SVitaly Wool 	enum buddy bud;
7809a001fc1SVitaly Wool 
7819a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
7829a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
7839a001fc1SVitaly Wool 
7849a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private)) {
785ca0246bbSVitaly Wool 		/* if a headless page is under reclaim, just leave.
786ca0246bbSVitaly Wool 		 * NB: we use test_and_set_bit for a reason: if the bit
787ca0246bbSVitaly Wool 		 * has not been set before, we release this page
788ca0246bbSVitaly Wool 		 * immediately so we don't care about its value any more.
789ca0246bbSVitaly Wool 		 */
790ca0246bbSVitaly Wool 		if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
791ca0246bbSVitaly Wool 			spin_lock(&pool->lock);
792ca0246bbSVitaly Wool 			list_del(&page->lru);
793ca0246bbSVitaly Wool 			spin_unlock(&pool->lock);
794ca0246bbSVitaly Wool 			free_z3fold_page(page);
795ca0246bbSVitaly Wool 			atomic64_dec(&pool->pages_nr);
796ca0246bbSVitaly Wool 		}
797ca0246bbSVitaly Wool 		return;
798ca0246bbSVitaly Wool 	}
799ca0246bbSVitaly Wool 
800ca0246bbSVitaly Wool 	/* Non-headless case */
8012f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
80243afc194SVitaly Wool 	bud = handle_to_buddy(handle);
8039a001fc1SVitaly Wool 
8049a001fc1SVitaly Wool 	switch (bud) {
8059a001fc1SVitaly Wool 	case FIRST:
8069a001fc1SVitaly Wool 		zhdr->first_chunks = 0;
8079a001fc1SVitaly Wool 		break;
8089a001fc1SVitaly Wool 	case MIDDLE:
8099a001fc1SVitaly Wool 		zhdr->middle_chunks = 0;
8109a001fc1SVitaly Wool 		break;
8119a001fc1SVitaly Wool 	case LAST:
8129a001fc1SVitaly Wool 		zhdr->last_chunks = 0;
8139a001fc1SVitaly Wool 		break;
8149a001fc1SVitaly Wool 	default:
8159a001fc1SVitaly Wool 		pr_err("%s: unknown bud %d\n", __func__, bud);
8169a001fc1SVitaly Wool 		WARN_ON(1);
8172f1e5e4dSVitaly Wool 		z3fold_page_unlock(zhdr);
8189a001fc1SVitaly Wool 		return;
8199a001fc1SVitaly Wool 	}
8209a001fc1SVitaly Wool 
821d30561c5SVitaly Wool 	if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
822d30561c5SVitaly Wool 		atomic64_dec(&pool->pages_nr);
823d30561c5SVitaly Wool 		return;
824d30561c5SVitaly Wool 	}
825ca0246bbSVitaly Wool 	if (test_bit(PAGE_CLAIMED, &page->private)) {
8266098d7e1SVitaly Wool 		z3fold_page_unlock(zhdr);
8276098d7e1SVitaly Wool 		return;
8286098d7e1SVitaly Wool 	}
829d30561c5SVitaly Wool 	if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
830d30561c5SVitaly Wool 		z3fold_page_unlock(zhdr);
831d30561c5SVitaly Wool 		return;
832d30561c5SVitaly Wool 	}
833d30561c5SVitaly Wool 	if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
834d30561c5SVitaly Wool 		spin_lock(&pool->lock);
835d30561c5SVitaly Wool 		list_del_init(&zhdr->buddy);
836d30561c5SVitaly Wool 		spin_unlock(&pool->lock);
837d30561c5SVitaly Wool 		zhdr->cpu = -1;
8385d03a661SVitaly Wool 		kref_get(&zhdr->refcount);
839d30561c5SVitaly Wool 		do_compact_page(zhdr, true);
840d30561c5SVitaly Wool 		return;
841d30561c5SVitaly Wool 	}
8425d03a661SVitaly Wool 	kref_get(&zhdr->refcount);
843d30561c5SVitaly Wool 	queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
844d30561c5SVitaly Wool 	z3fold_page_unlock(zhdr);
8459a001fc1SVitaly Wool }
8469a001fc1SVitaly Wool 
8479a001fc1SVitaly Wool /**
8489a001fc1SVitaly Wool  * z3fold_reclaim_page() - evicts allocations from a pool page and frees it
8499a001fc1SVitaly Wool  * @pool:	pool from which a page will attempt to be evicted
850f144c390SMike Rapoport  * @retries:	number of pages on the LRU list for which eviction will
8519a001fc1SVitaly Wool  *		be attempted before failing
8529a001fc1SVitaly Wool  *
8539a001fc1SVitaly Wool  * z3fold reclaim is different from normal system reclaim in that it is done
8549a001fc1SVitaly Wool  * from the bottom, up. This is because only the bottom layer, z3fold, has
8559a001fc1SVitaly Wool  * information on how the allocations are organized within each z3fold page.
8569a001fc1SVitaly Wool  * This has the potential to create interesting locking situations between
8579a001fc1SVitaly Wool  * z3fold and the user, however.
8589a001fc1SVitaly Wool  *
8599a001fc1SVitaly Wool  * To avoid these, this is how z3fold_reclaim_page() should be called:
860f144c390SMike Rapoport  *
8619a001fc1SVitaly Wool  * The user detects a page should be reclaimed and calls z3fold_reclaim_page().
8629a001fc1SVitaly Wool  * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and
8639a001fc1SVitaly Wool  * call the user-defined eviction handler with the pool and handle as
8649a001fc1SVitaly Wool  * arguments.
8659a001fc1SVitaly Wool  *
8669a001fc1SVitaly Wool  * If the handle can not be evicted, the eviction handler should return
8679a001fc1SVitaly Wool  * non-zero. z3fold_reclaim_page() will add the z3fold page back to the
8689a001fc1SVitaly Wool  * appropriate list and try the next z3fold page on the LRU up to
8699a001fc1SVitaly Wool  * a user defined number of retries.
8709a001fc1SVitaly Wool  *
8719a001fc1SVitaly Wool  * If the handle is successfully evicted, the eviction handler should
8729a001fc1SVitaly Wool  * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free()
8739a001fc1SVitaly Wool  * contains logic to delay freeing the page if the page is under reclaim,
8749a001fc1SVitaly Wool  * as indicated by the setting of the PG_reclaim flag on the underlying page.
8759a001fc1SVitaly Wool  *
8769a001fc1SVitaly Wool  * If all buddies in the z3fold page are successfully evicted, then the
8779a001fc1SVitaly Wool  * z3fold page can be freed.
8789a001fc1SVitaly Wool  *
8799a001fc1SVitaly Wool  * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
8809a001fc1SVitaly Wool  * no pages to evict or an eviction handler is not registered, -EAGAIN if
8819a001fc1SVitaly Wool  * the retry limit was hit.
8829a001fc1SVitaly Wool  */
8839a001fc1SVitaly Wool static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
8849a001fc1SVitaly Wool {
885d30561c5SVitaly Wool 	int i, ret = 0;
886d30561c5SVitaly Wool 	struct z3fold_header *zhdr = NULL;
887d30561c5SVitaly Wool 	struct page *page = NULL;
888d30561c5SVitaly Wool 	struct list_head *pos;
8899a001fc1SVitaly Wool 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
8909a001fc1SVitaly Wool 
8919a001fc1SVitaly Wool 	spin_lock(&pool->lock);
8922f1e5e4dSVitaly Wool 	if (!pool->ops || !pool->ops->evict || retries == 0) {
8939a001fc1SVitaly Wool 		spin_unlock(&pool->lock);
8949a001fc1SVitaly Wool 		return -EINVAL;
8959a001fc1SVitaly Wool 	}
8969a001fc1SVitaly Wool 	for (i = 0; i < retries; i++) {
8972f1e5e4dSVitaly Wool 		if (list_empty(&pool->lru)) {
8982f1e5e4dSVitaly Wool 			spin_unlock(&pool->lock);
8992f1e5e4dSVitaly Wool 			return -EINVAL;
9002f1e5e4dSVitaly Wool 		}
901d30561c5SVitaly Wool 		list_for_each_prev(pos, &pool->lru) {
902d30561c5SVitaly Wool 			page = list_entry(pos, struct page, lru);
903ca0246bbSVitaly Wool 
904ca0246bbSVitaly Wool 			/* this bit could have been set by free, in which case
905ca0246bbSVitaly Wool 			 * we pass over to the next page in the pool.
906ca0246bbSVitaly Wool 			 */
907ca0246bbSVitaly Wool 			if (test_and_set_bit(PAGE_CLAIMED, &page->private))
908ca0246bbSVitaly Wool 				continue;
9099a001fc1SVitaly Wool 
9109a001fc1SVitaly Wool 			zhdr = page_address(page);
911ca0246bbSVitaly Wool 			if (test_bit(PAGE_HEADLESS, &page->private))
912ca0246bbSVitaly Wool 				break;
913ca0246bbSVitaly Wool 
914ca0246bbSVitaly Wool 			if (!z3fold_page_trylock(zhdr)) {
915ca0246bbSVitaly Wool 				zhdr = NULL;
916d30561c5SVitaly Wool 				continue; /* can't evict at this point */
917ca0246bbSVitaly Wool 			}
9185a27aa82SVitaly Wool 			kref_get(&zhdr->refcount);
919d30561c5SVitaly Wool 			list_del_init(&zhdr->buddy);
920d30561c5SVitaly Wool 			zhdr->cpu = -1;
9216098d7e1SVitaly Wool 			break;
922d30561c5SVitaly Wool 		}
923d30561c5SVitaly Wool 
924ca0246bbSVitaly Wool 		if (!zhdr)
925ca0246bbSVitaly Wool 			break;
926ca0246bbSVitaly Wool 
927d30561c5SVitaly Wool 		list_del_init(&page->lru);
9282f1e5e4dSVitaly Wool 		spin_unlock(&pool->lock);
929d30561c5SVitaly Wool 
930d30561c5SVitaly Wool 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
9319a001fc1SVitaly Wool 			/*
9329a001fc1SVitaly Wool 			 * We need encode the handles before unlocking, since
9339a001fc1SVitaly Wool 			 * we can race with free that will set
9349a001fc1SVitaly Wool 			 * (first|last)_chunks to 0
9359a001fc1SVitaly Wool 			 */
9369a001fc1SVitaly Wool 			first_handle = 0;
9379a001fc1SVitaly Wool 			last_handle = 0;
9389a001fc1SVitaly Wool 			middle_handle = 0;
9399a001fc1SVitaly Wool 			if (zhdr->first_chunks)
9409a001fc1SVitaly Wool 				first_handle = encode_handle(zhdr, FIRST);
9419a001fc1SVitaly Wool 			if (zhdr->middle_chunks)
9429a001fc1SVitaly Wool 				middle_handle = encode_handle(zhdr, MIDDLE);
9439a001fc1SVitaly Wool 			if (zhdr->last_chunks)
9449a001fc1SVitaly Wool 				last_handle = encode_handle(zhdr, LAST);
945d30561c5SVitaly Wool 			/*
946d30561c5SVitaly Wool 			 * it's safe to unlock here because we hold a
947d30561c5SVitaly Wool 			 * reference to this page
948d30561c5SVitaly Wool 			 */
9492f1e5e4dSVitaly Wool 			z3fold_page_unlock(zhdr);
9509a001fc1SVitaly Wool 		} else {
9519a001fc1SVitaly Wool 			first_handle = encode_handle(zhdr, HEADLESS);
9529a001fc1SVitaly Wool 			last_handle = middle_handle = 0;
9532f1e5e4dSVitaly Wool 		}
9549a001fc1SVitaly Wool 
9559a001fc1SVitaly Wool 		/* Issue the eviction callback(s) */
9569a001fc1SVitaly Wool 		if (middle_handle) {
9579a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, middle_handle);
9589a001fc1SVitaly Wool 			if (ret)
9599a001fc1SVitaly Wool 				goto next;
9609a001fc1SVitaly Wool 		}
9619a001fc1SVitaly Wool 		if (first_handle) {
9629a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, first_handle);
9639a001fc1SVitaly Wool 			if (ret)
9649a001fc1SVitaly Wool 				goto next;
9659a001fc1SVitaly Wool 		}
9669a001fc1SVitaly Wool 		if (last_handle) {
9679a001fc1SVitaly Wool 			ret = pool->ops->evict(pool, last_handle);
9689a001fc1SVitaly Wool 			if (ret)
9699a001fc1SVitaly Wool 				goto next;
9709a001fc1SVitaly Wool 		}
9719a001fc1SVitaly Wool next:
9725a27aa82SVitaly Wool 		if (test_bit(PAGE_HEADLESS, &page->private)) {
9735a27aa82SVitaly Wool 			if (ret == 0) {
9745a27aa82SVitaly Wool 				free_z3fold_page(page);
975ca0246bbSVitaly Wool 				atomic64_dec(&pool->pages_nr);
9769a001fc1SVitaly Wool 				return 0;
9775a27aa82SVitaly Wool 			}
9786098d7e1SVitaly Wool 			spin_lock(&pool->lock);
9796098d7e1SVitaly Wool 			list_add(&page->lru, &pool->lru);
980d5567c9dSVitaly Wool 			spin_unlock(&pool->lock);
9816098d7e1SVitaly Wool 		} else {
9826098d7e1SVitaly Wool 			z3fold_page_lock(zhdr);
983ca0246bbSVitaly Wool 			clear_bit(PAGE_CLAIMED, &page->private);
9846098d7e1SVitaly Wool 			if (kref_put(&zhdr->refcount,
9856098d7e1SVitaly Wool 					release_z3fold_page_locked)) {
9866098d7e1SVitaly Wool 				atomic64_dec(&pool->pages_nr);
9875a27aa82SVitaly Wool 				return 0;
9885a27aa82SVitaly Wool 			}
9895a27aa82SVitaly Wool 			/*
9906098d7e1SVitaly Wool 			 * if we are here, the page is still not completely
9916098d7e1SVitaly Wool 			 * free. Take the global pool lock then to be able
9926098d7e1SVitaly Wool 			 * to add it back to the lru list
9935a27aa82SVitaly Wool 			 */
9946098d7e1SVitaly Wool 			spin_lock(&pool->lock);
9959a001fc1SVitaly Wool 			list_add(&page->lru, &pool->lru);
9966098d7e1SVitaly Wool 			spin_unlock(&pool->lock);
9976098d7e1SVitaly Wool 			z3fold_page_unlock(zhdr);
9986098d7e1SVitaly Wool 		}
9996098d7e1SVitaly Wool 
10006098d7e1SVitaly Wool 		/* We started off locked to we need to lock the pool back */
10016098d7e1SVitaly Wool 		spin_lock(&pool->lock);
10029a001fc1SVitaly Wool 	}
10039a001fc1SVitaly Wool 	spin_unlock(&pool->lock);
10049a001fc1SVitaly Wool 	return -EAGAIN;
10059a001fc1SVitaly Wool }
10069a001fc1SVitaly Wool 
10079a001fc1SVitaly Wool /**
10089a001fc1SVitaly Wool  * z3fold_map() - maps the allocation associated with the given handle
10099a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
10109a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be mapped
10119a001fc1SVitaly Wool  *
10129a001fc1SVitaly Wool  * Extracts the buddy number from handle and constructs the pointer to the
10139a001fc1SVitaly Wool  * correct starting chunk within the page.
10149a001fc1SVitaly Wool  *
10159a001fc1SVitaly Wool  * Returns: a pointer to the mapped allocation
10169a001fc1SVitaly Wool  */
10179a001fc1SVitaly Wool static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
10189a001fc1SVitaly Wool {
10199a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
10209a001fc1SVitaly Wool 	struct page *page;
10219a001fc1SVitaly Wool 	void *addr;
10229a001fc1SVitaly Wool 	enum buddy buddy;
10239a001fc1SVitaly Wool 
10249a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
10259a001fc1SVitaly Wool 	addr = zhdr;
10269a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
10279a001fc1SVitaly Wool 
10289a001fc1SVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
10299a001fc1SVitaly Wool 		goto out;
10309a001fc1SVitaly Wool 
10312f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
10329a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
10339a001fc1SVitaly Wool 	switch (buddy) {
10349a001fc1SVitaly Wool 	case FIRST:
10359a001fc1SVitaly Wool 		addr += ZHDR_SIZE_ALIGNED;
10369a001fc1SVitaly Wool 		break;
10379a001fc1SVitaly Wool 	case MIDDLE:
10389a001fc1SVitaly Wool 		addr += zhdr->start_middle << CHUNK_SHIFT;
10399a001fc1SVitaly Wool 		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
10409a001fc1SVitaly Wool 		break;
10419a001fc1SVitaly Wool 	case LAST:
1042ca0246bbSVitaly Wool 		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
10439a001fc1SVitaly Wool 		break;
10449a001fc1SVitaly Wool 	default:
10459a001fc1SVitaly Wool 		pr_err("unknown buddy id %d\n", buddy);
10469a001fc1SVitaly Wool 		WARN_ON(1);
10479a001fc1SVitaly Wool 		addr = NULL;
10489a001fc1SVitaly Wool 		break;
10499a001fc1SVitaly Wool 	}
10502f1e5e4dSVitaly Wool 
10512f1e5e4dSVitaly Wool 	z3fold_page_unlock(zhdr);
10529a001fc1SVitaly Wool out:
10539a001fc1SVitaly Wool 	return addr;
10549a001fc1SVitaly Wool }
10559a001fc1SVitaly Wool 
10569a001fc1SVitaly Wool /**
10579a001fc1SVitaly Wool  * z3fold_unmap() - unmaps the allocation associated with the given handle
10589a001fc1SVitaly Wool  * @pool:	pool in which the allocation resides
10599a001fc1SVitaly Wool  * @handle:	handle associated with the allocation to be unmapped
10609a001fc1SVitaly Wool  */
10619a001fc1SVitaly Wool static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
10629a001fc1SVitaly Wool {
10639a001fc1SVitaly Wool 	struct z3fold_header *zhdr;
10649a001fc1SVitaly Wool 	struct page *page;
10659a001fc1SVitaly Wool 	enum buddy buddy;
10669a001fc1SVitaly Wool 
10679a001fc1SVitaly Wool 	zhdr = handle_to_z3fold_header(handle);
10689a001fc1SVitaly Wool 	page = virt_to_page(zhdr);
10699a001fc1SVitaly Wool 
10702f1e5e4dSVitaly Wool 	if (test_bit(PAGE_HEADLESS, &page->private))
10719a001fc1SVitaly Wool 		return;
10729a001fc1SVitaly Wool 
10732f1e5e4dSVitaly Wool 	z3fold_page_lock(zhdr);
10749a001fc1SVitaly Wool 	buddy = handle_to_buddy(handle);
10759a001fc1SVitaly Wool 	if (buddy == MIDDLE)
10769a001fc1SVitaly Wool 		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
10772f1e5e4dSVitaly Wool 	z3fold_page_unlock(zhdr);
10789a001fc1SVitaly Wool }
10799a001fc1SVitaly Wool 
10809a001fc1SVitaly Wool /**
10819a001fc1SVitaly Wool  * z3fold_get_pool_size() - gets the z3fold pool size in pages
10829a001fc1SVitaly Wool  * @pool:	pool whose size is being queried
10839a001fc1SVitaly Wool  *
108412d59ae6SVitaly Wool  * Returns: size in pages of the given pool.
10859a001fc1SVitaly Wool  */
10869a001fc1SVitaly Wool static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
10879a001fc1SVitaly Wool {
108812d59ae6SVitaly Wool 	return atomic64_read(&pool->pages_nr);
10899a001fc1SVitaly Wool }
10909a001fc1SVitaly Wool 
10919a001fc1SVitaly Wool /*****************
10929a001fc1SVitaly Wool  * zpool
10939a001fc1SVitaly Wool  ****************/
10949a001fc1SVitaly Wool 
10959a001fc1SVitaly Wool static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
10969a001fc1SVitaly Wool {
10979a001fc1SVitaly Wool 	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
10989a001fc1SVitaly Wool 		return pool->zpool_ops->evict(pool->zpool, handle);
10999a001fc1SVitaly Wool 	else
11009a001fc1SVitaly Wool 		return -ENOENT;
11019a001fc1SVitaly Wool }
11029a001fc1SVitaly Wool 
11039a001fc1SVitaly Wool static const struct z3fold_ops z3fold_zpool_ops = {
11049a001fc1SVitaly Wool 	.evict =	z3fold_zpool_evict
11059a001fc1SVitaly Wool };
11069a001fc1SVitaly Wool 
11079a001fc1SVitaly Wool static void *z3fold_zpool_create(const char *name, gfp_t gfp,
11089a001fc1SVitaly Wool 			       const struct zpool_ops *zpool_ops,
11099a001fc1SVitaly Wool 			       struct zpool *zpool)
11109a001fc1SVitaly Wool {
11119a001fc1SVitaly Wool 	struct z3fold_pool *pool;
11129a001fc1SVitaly Wool 
1113d30561c5SVitaly Wool 	pool = z3fold_create_pool(name, gfp,
1114d30561c5SVitaly Wool 				zpool_ops ? &z3fold_zpool_ops : NULL);
11159a001fc1SVitaly Wool 	if (pool) {
11169a001fc1SVitaly Wool 		pool->zpool = zpool;
11179a001fc1SVitaly Wool 		pool->zpool_ops = zpool_ops;
11189a001fc1SVitaly Wool 	}
11199a001fc1SVitaly Wool 	return pool;
11209a001fc1SVitaly Wool }
11219a001fc1SVitaly Wool 
11229a001fc1SVitaly Wool static void z3fold_zpool_destroy(void *pool)
11239a001fc1SVitaly Wool {
11249a001fc1SVitaly Wool 	z3fold_destroy_pool(pool);
11259a001fc1SVitaly Wool }
11269a001fc1SVitaly Wool 
11279a001fc1SVitaly Wool static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp,
11289a001fc1SVitaly Wool 			unsigned long *handle)
11299a001fc1SVitaly Wool {
11309a001fc1SVitaly Wool 	return z3fold_alloc(pool, size, gfp, handle);
11319a001fc1SVitaly Wool }
11329a001fc1SVitaly Wool static void z3fold_zpool_free(void *pool, unsigned long handle)
11339a001fc1SVitaly Wool {
11349a001fc1SVitaly Wool 	z3fold_free(pool, handle);
11359a001fc1SVitaly Wool }
11369a001fc1SVitaly Wool 
11379a001fc1SVitaly Wool static int z3fold_zpool_shrink(void *pool, unsigned int pages,
11389a001fc1SVitaly Wool 			unsigned int *reclaimed)
11399a001fc1SVitaly Wool {
11409a001fc1SVitaly Wool 	unsigned int total = 0;
11419a001fc1SVitaly Wool 	int ret = -EINVAL;
11429a001fc1SVitaly Wool 
11439a001fc1SVitaly Wool 	while (total < pages) {
11449a001fc1SVitaly Wool 		ret = z3fold_reclaim_page(pool, 8);
11459a001fc1SVitaly Wool 		if (ret < 0)
11469a001fc1SVitaly Wool 			break;
11479a001fc1SVitaly Wool 		total++;
11489a001fc1SVitaly Wool 	}
11499a001fc1SVitaly Wool 
11509a001fc1SVitaly Wool 	if (reclaimed)
11519a001fc1SVitaly Wool 		*reclaimed = total;
11529a001fc1SVitaly Wool 
11539a001fc1SVitaly Wool 	return ret;
11549a001fc1SVitaly Wool }
11559a001fc1SVitaly Wool 
11569a001fc1SVitaly Wool static void *z3fold_zpool_map(void *pool, unsigned long handle,
11579a001fc1SVitaly Wool 			enum zpool_mapmode mm)
11589a001fc1SVitaly Wool {
11599a001fc1SVitaly Wool 	return z3fold_map(pool, handle);
11609a001fc1SVitaly Wool }
11619a001fc1SVitaly Wool static void z3fold_zpool_unmap(void *pool, unsigned long handle)
11629a001fc1SVitaly Wool {
11639a001fc1SVitaly Wool 	z3fold_unmap(pool, handle);
11649a001fc1SVitaly Wool }
11659a001fc1SVitaly Wool 
11669a001fc1SVitaly Wool static u64 z3fold_zpool_total_size(void *pool)
11679a001fc1SVitaly Wool {
11689a001fc1SVitaly Wool 	return z3fold_get_pool_size(pool) * PAGE_SIZE;
11699a001fc1SVitaly Wool }
11709a001fc1SVitaly Wool 
11719a001fc1SVitaly Wool static struct zpool_driver z3fold_zpool_driver = {
11729a001fc1SVitaly Wool 	.type =		"z3fold",
11739a001fc1SVitaly Wool 	.owner =	THIS_MODULE,
11749a001fc1SVitaly Wool 	.create =	z3fold_zpool_create,
11759a001fc1SVitaly Wool 	.destroy =	z3fold_zpool_destroy,
11769a001fc1SVitaly Wool 	.malloc =	z3fold_zpool_malloc,
11779a001fc1SVitaly Wool 	.free =		z3fold_zpool_free,
11789a001fc1SVitaly Wool 	.shrink =	z3fold_zpool_shrink,
11799a001fc1SVitaly Wool 	.map =		z3fold_zpool_map,
11809a001fc1SVitaly Wool 	.unmap =	z3fold_zpool_unmap,
11819a001fc1SVitaly Wool 	.total_size =	z3fold_zpool_total_size,
11829a001fc1SVitaly Wool };
11839a001fc1SVitaly Wool 
11849a001fc1SVitaly Wool MODULE_ALIAS("zpool-z3fold");
11859a001fc1SVitaly Wool 
11869a001fc1SVitaly Wool static int __init init_z3fold(void)
11879a001fc1SVitaly Wool {
1188ede93213SVitaly Wool 	/* Make sure the z3fold header is not larger than the page size */
1189ede93213SVitaly Wool 	BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
11909a001fc1SVitaly Wool 	zpool_register_driver(&z3fold_zpool_driver);
11919a001fc1SVitaly Wool 
11929a001fc1SVitaly Wool 	return 0;
11939a001fc1SVitaly Wool }
11949a001fc1SVitaly Wool 
11959a001fc1SVitaly Wool static void __exit exit_z3fold(void)
11969a001fc1SVitaly Wool {
11979a001fc1SVitaly Wool 	zpool_unregister_driver(&z3fold_zpool_driver);
11989a001fc1SVitaly Wool }
11999a001fc1SVitaly Wool 
12009a001fc1SVitaly Wool module_init(init_z3fold);
12019a001fc1SVitaly Wool module_exit(exit_z3fold);
12029a001fc1SVitaly Wool 
12039a001fc1SVitaly Wool MODULE_LICENSE("GPL");
12049a001fc1SVitaly Wool MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>");
12059a001fc1SVitaly Wool MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");
1206