xref: /linux/mm/zswap.c (revision 2b2811178e85553405b86e3fe78357b9b95889ce)
1*2b281117SSeth Jennings /*
2*2b281117SSeth Jennings  * zswap.c - zswap driver file
3*2b281117SSeth Jennings  *
4*2b281117SSeth Jennings  * zswap is a backend for frontswap that takes pages that are in the process
5*2b281117SSeth Jennings  * of being swapped out and attempts to compress and store them in a
6*2b281117SSeth Jennings  * RAM-based memory pool.  This can result in a significant I/O reduction on
7*2b281117SSeth Jennings  * the swap device and, in the case where decompressing from RAM is faster
8*2b281117SSeth Jennings  * than reading from the swap device, can also improve workload performance.
9*2b281117SSeth Jennings  *
10*2b281117SSeth Jennings  * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
11*2b281117SSeth Jennings  *
12*2b281117SSeth Jennings  * This program is free software; you can redistribute it and/or
13*2b281117SSeth Jennings  * modify it under the terms of the GNU General Public License
14*2b281117SSeth Jennings  * as published by the Free Software Foundation; either version 2
15*2b281117SSeth Jennings  * of the License, or (at your option) any later version.
16*2b281117SSeth Jennings  *
17*2b281117SSeth Jennings  * This program is distributed in the hope that it will be useful,
18*2b281117SSeth Jennings  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19*2b281117SSeth Jennings  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20*2b281117SSeth Jennings  * GNU General Public License for more details.
21*2b281117SSeth Jennings */
22*2b281117SSeth Jennings 
23*2b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24*2b281117SSeth Jennings 
25*2b281117SSeth Jennings #include <linux/module.h>
26*2b281117SSeth Jennings #include <linux/cpu.h>
27*2b281117SSeth Jennings #include <linux/highmem.h>
28*2b281117SSeth Jennings #include <linux/slab.h>
29*2b281117SSeth Jennings #include <linux/spinlock.h>
30*2b281117SSeth Jennings #include <linux/types.h>
31*2b281117SSeth Jennings #include <linux/atomic.h>
32*2b281117SSeth Jennings #include <linux/frontswap.h>
33*2b281117SSeth Jennings #include <linux/rbtree.h>
34*2b281117SSeth Jennings #include <linux/swap.h>
35*2b281117SSeth Jennings #include <linux/crypto.h>
36*2b281117SSeth Jennings #include <linux/mempool.h>
37*2b281117SSeth Jennings #include <linux/zbud.h>
38*2b281117SSeth Jennings 
39*2b281117SSeth Jennings #include <linux/mm_types.h>
40*2b281117SSeth Jennings #include <linux/page-flags.h>
41*2b281117SSeth Jennings #include <linux/swapops.h>
42*2b281117SSeth Jennings #include <linux/writeback.h>
43*2b281117SSeth Jennings #include <linux/pagemap.h>
44*2b281117SSeth Jennings 
45*2b281117SSeth Jennings /*********************************
46*2b281117SSeth Jennings * statistics
47*2b281117SSeth Jennings **********************************/
48*2b281117SSeth Jennings /* Number of memory pages used by the compressed pool */
49*2b281117SSeth Jennings static u64 zswap_pool_pages;
50*2b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */
51*2b281117SSeth Jennings static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
52*2b281117SSeth Jennings 
53*2b281117SSeth Jennings /*
54*2b281117SSeth Jennings  * The statistics below are not protected from concurrent access for
55*2b281117SSeth Jennings  * performance reasons so they may not be a 100% accurate.  However,
56*2b281117SSeth Jennings  * they do provide useful information on roughly how many times a
57*2b281117SSeth Jennings  * certain event is occurring.
58*2b281117SSeth Jennings */
59*2b281117SSeth Jennings 
60*2b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */
61*2b281117SSeth Jennings static u64 zswap_pool_limit_hit;
62*2b281117SSeth Jennings /* Pages written back when pool limit was reached */
63*2b281117SSeth Jennings static u64 zswap_written_back_pages;
64*2b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */
65*2b281117SSeth Jennings static u64 zswap_reject_reclaim_fail;
66*2b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */
67*2b281117SSeth Jennings static u64 zswap_reject_compress_poor;
68*2b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */
69*2b281117SSeth Jennings static u64 zswap_reject_alloc_fail;
70*2b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */
71*2b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail;
72*2b281117SSeth Jennings /* Duplicate store was encountered (rare) */
73*2b281117SSeth Jennings static u64 zswap_duplicate_entry;
74*2b281117SSeth Jennings 
75*2b281117SSeth Jennings /*********************************
76*2b281117SSeth Jennings * tunables
77*2b281117SSeth Jennings **********************************/
78*2b281117SSeth Jennings /* Enable/disable zswap (disabled by default, fixed at boot for now) */
79*2b281117SSeth Jennings static bool zswap_enabled __read_mostly;
80*2b281117SSeth Jennings module_param_named(enabled, zswap_enabled, bool, 0);
81*2b281117SSeth Jennings 
82*2b281117SSeth Jennings /* Compressor to be used by zswap (fixed at boot for now) */
83*2b281117SSeth Jennings #define ZSWAP_COMPRESSOR_DEFAULT "lzo"
84*2b281117SSeth Jennings static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
85*2b281117SSeth Jennings module_param_named(compressor, zswap_compressor, charp, 0);
86*2b281117SSeth Jennings 
87*2b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */
88*2b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20;
89*2b281117SSeth Jennings module_param_named(max_pool_percent,
90*2b281117SSeth Jennings 			zswap_max_pool_percent, uint, 0644);
91*2b281117SSeth Jennings 
92*2b281117SSeth Jennings /*********************************
93*2b281117SSeth Jennings * compression functions
94*2b281117SSeth Jennings **********************************/
95*2b281117SSeth Jennings /* per-cpu compression transforms */
96*2b281117SSeth Jennings static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
97*2b281117SSeth Jennings 
98*2b281117SSeth Jennings enum comp_op {
99*2b281117SSeth Jennings 	ZSWAP_COMPOP_COMPRESS,
100*2b281117SSeth Jennings 	ZSWAP_COMPOP_DECOMPRESS
101*2b281117SSeth Jennings };
102*2b281117SSeth Jennings 
103*2b281117SSeth Jennings static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
104*2b281117SSeth Jennings 				u8 *dst, unsigned int *dlen)
105*2b281117SSeth Jennings {
106*2b281117SSeth Jennings 	struct crypto_comp *tfm;
107*2b281117SSeth Jennings 	int ret;
108*2b281117SSeth Jennings 
109*2b281117SSeth Jennings 	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
110*2b281117SSeth Jennings 	switch (op) {
111*2b281117SSeth Jennings 	case ZSWAP_COMPOP_COMPRESS:
112*2b281117SSeth Jennings 		ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
113*2b281117SSeth Jennings 		break;
114*2b281117SSeth Jennings 	case ZSWAP_COMPOP_DECOMPRESS:
115*2b281117SSeth Jennings 		ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
116*2b281117SSeth Jennings 		break;
117*2b281117SSeth Jennings 	default:
118*2b281117SSeth Jennings 		ret = -EINVAL;
119*2b281117SSeth Jennings 	}
120*2b281117SSeth Jennings 
121*2b281117SSeth Jennings 	put_cpu();
122*2b281117SSeth Jennings 	return ret;
123*2b281117SSeth Jennings }
124*2b281117SSeth Jennings 
125*2b281117SSeth Jennings static int __init zswap_comp_init(void)
126*2b281117SSeth Jennings {
127*2b281117SSeth Jennings 	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
128*2b281117SSeth Jennings 		pr_info("%s compressor not available\n", zswap_compressor);
129*2b281117SSeth Jennings 		/* fall back to default compressor */
130*2b281117SSeth Jennings 		zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
131*2b281117SSeth Jennings 		if (!crypto_has_comp(zswap_compressor, 0, 0))
132*2b281117SSeth Jennings 			/* can't even load the default compressor */
133*2b281117SSeth Jennings 			return -ENODEV;
134*2b281117SSeth Jennings 	}
135*2b281117SSeth Jennings 	pr_info("using %s compressor\n", zswap_compressor);
136*2b281117SSeth Jennings 
137*2b281117SSeth Jennings 	/* alloc percpu transforms */
138*2b281117SSeth Jennings 	zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
139*2b281117SSeth Jennings 	if (!zswap_comp_pcpu_tfms)
140*2b281117SSeth Jennings 		return -ENOMEM;
141*2b281117SSeth Jennings 	return 0;
142*2b281117SSeth Jennings }
143*2b281117SSeth Jennings 
144*2b281117SSeth Jennings static void zswap_comp_exit(void)
145*2b281117SSeth Jennings {
146*2b281117SSeth Jennings 	/* free percpu transforms */
147*2b281117SSeth Jennings 	if (zswap_comp_pcpu_tfms)
148*2b281117SSeth Jennings 		free_percpu(zswap_comp_pcpu_tfms);
149*2b281117SSeth Jennings }
150*2b281117SSeth Jennings 
151*2b281117SSeth Jennings /*********************************
152*2b281117SSeth Jennings * data structures
153*2b281117SSeth Jennings **********************************/
154*2b281117SSeth Jennings /*
155*2b281117SSeth Jennings  * struct zswap_entry
156*2b281117SSeth Jennings  *
157*2b281117SSeth Jennings  * This structure contains the metadata for tracking a single compressed
158*2b281117SSeth Jennings  * page within zswap.
159*2b281117SSeth Jennings  *
160*2b281117SSeth Jennings  * rbnode - links the entry into red-black tree for the appropriate swap type
161*2b281117SSeth Jennings  * refcount - the number of outstanding reference to the entry. This is needed
162*2b281117SSeth Jennings  *            to protect against premature freeing of the entry by code
163*2b281117SSeth Jennings  *            concurent calls to load, invalidate, and writeback.  The lock
164*2b281117SSeth Jennings  *            for the zswap_tree structure that contains the entry must
165*2b281117SSeth Jennings  *            be held while changing the refcount.  Since the lock must
166*2b281117SSeth Jennings  *            be held, there is no reason to also make refcount atomic.
167*2b281117SSeth Jennings  * offset - the swap offset for the entry.  Index into the red-black tree.
168*2b281117SSeth Jennings  * handle - zsmalloc allocation handle that stores the compressed page data
169*2b281117SSeth Jennings  * length - the length in bytes of the compressed page data.  Needed during
170*2b281117SSeth Jennings  *           decompression
171*2b281117SSeth Jennings  */
172*2b281117SSeth Jennings struct zswap_entry {
173*2b281117SSeth Jennings 	struct rb_node rbnode;
174*2b281117SSeth Jennings 	pgoff_t offset;
175*2b281117SSeth Jennings 	int refcount;
176*2b281117SSeth Jennings 	unsigned int length;
177*2b281117SSeth Jennings 	unsigned long handle;
178*2b281117SSeth Jennings };
179*2b281117SSeth Jennings 
180*2b281117SSeth Jennings struct zswap_header {
181*2b281117SSeth Jennings 	swp_entry_t swpentry;
182*2b281117SSeth Jennings };
183*2b281117SSeth Jennings 
184*2b281117SSeth Jennings /*
185*2b281117SSeth Jennings  * The tree lock in the zswap_tree struct protects a few things:
186*2b281117SSeth Jennings  * - the rbtree
187*2b281117SSeth Jennings  * - the refcount field of each entry in the tree
188*2b281117SSeth Jennings  */
189*2b281117SSeth Jennings struct zswap_tree {
190*2b281117SSeth Jennings 	struct rb_root rbroot;
191*2b281117SSeth Jennings 	spinlock_t lock;
192*2b281117SSeth Jennings 	struct zbud_pool *pool;
193*2b281117SSeth Jennings };
194*2b281117SSeth Jennings 
195*2b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
196*2b281117SSeth Jennings 
197*2b281117SSeth Jennings /*********************************
198*2b281117SSeth Jennings * zswap entry functions
199*2b281117SSeth Jennings **********************************/
200*2b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache;
201*2b281117SSeth Jennings 
202*2b281117SSeth Jennings static int zswap_entry_cache_create(void)
203*2b281117SSeth Jennings {
204*2b281117SSeth Jennings 	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
205*2b281117SSeth Jennings 	return (zswap_entry_cache == NULL);
206*2b281117SSeth Jennings }
207*2b281117SSeth Jennings 
208*2b281117SSeth Jennings static void zswap_entry_cache_destory(void)
209*2b281117SSeth Jennings {
210*2b281117SSeth Jennings 	kmem_cache_destroy(zswap_entry_cache);
211*2b281117SSeth Jennings }
212*2b281117SSeth Jennings 
213*2b281117SSeth Jennings static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
214*2b281117SSeth Jennings {
215*2b281117SSeth Jennings 	struct zswap_entry *entry;
216*2b281117SSeth Jennings 	entry = kmem_cache_alloc(zswap_entry_cache, gfp);
217*2b281117SSeth Jennings 	if (!entry)
218*2b281117SSeth Jennings 		return NULL;
219*2b281117SSeth Jennings 	entry->refcount = 1;
220*2b281117SSeth Jennings 	return entry;
221*2b281117SSeth Jennings }
222*2b281117SSeth Jennings 
223*2b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry)
224*2b281117SSeth Jennings {
225*2b281117SSeth Jennings 	kmem_cache_free(zswap_entry_cache, entry);
226*2b281117SSeth Jennings }
227*2b281117SSeth Jennings 
228*2b281117SSeth Jennings /* caller must hold the tree lock */
229*2b281117SSeth Jennings static void zswap_entry_get(struct zswap_entry *entry)
230*2b281117SSeth Jennings {
231*2b281117SSeth Jennings 	entry->refcount++;
232*2b281117SSeth Jennings }
233*2b281117SSeth Jennings 
234*2b281117SSeth Jennings /* caller must hold the tree lock */
235*2b281117SSeth Jennings static int zswap_entry_put(struct zswap_entry *entry)
236*2b281117SSeth Jennings {
237*2b281117SSeth Jennings 	entry->refcount--;
238*2b281117SSeth Jennings 	return entry->refcount;
239*2b281117SSeth Jennings }
240*2b281117SSeth Jennings 
241*2b281117SSeth Jennings /*********************************
242*2b281117SSeth Jennings * rbtree functions
243*2b281117SSeth Jennings **********************************/
244*2b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
245*2b281117SSeth Jennings {
246*2b281117SSeth Jennings 	struct rb_node *node = root->rb_node;
247*2b281117SSeth Jennings 	struct zswap_entry *entry;
248*2b281117SSeth Jennings 
249*2b281117SSeth Jennings 	while (node) {
250*2b281117SSeth Jennings 		entry = rb_entry(node, struct zswap_entry, rbnode);
251*2b281117SSeth Jennings 		if (entry->offset > offset)
252*2b281117SSeth Jennings 			node = node->rb_left;
253*2b281117SSeth Jennings 		else if (entry->offset < offset)
254*2b281117SSeth Jennings 			node = node->rb_right;
255*2b281117SSeth Jennings 		else
256*2b281117SSeth Jennings 			return entry;
257*2b281117SSeth Jennings 	}
258*2b281117SSeth Jennings 	return NULL;
259*2b281117SSeth Jennings }
260*2b281117SSeth Jennings 
261*2b281117SSeth Jennings /*
262*2b281117SSeth Jennings  * In the case that a entry with the same offset is found, a pointer to
263*2b281117SSeth Jennings  * the existing entry is stored in dupentry and the function returns -EEXIST
264*2b281117SSeth Jennings  */
265*2b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
266*2b281117SSeth Jennings 			struct zswap_entry **dupentry)
267*2b281117SSeth Jennings {
268*2b281117SSeth Jennings 	struct rb_node **link = &root->rb_node, *parent = NULL;
269*2b281117SSeth Jennings 	struct zswap_entry *myentry;
270*2b281117SSeth Jennings 
271*2b281117SSeth Jennings 	while (*link) {
272*2b281117SSeth Jennings 		parent = *link;
273*2b281117SSeth Jennings 		myentry = rb_entry(parent, struct zswap_entry, rbnode);
274*2b281117SSeth Jennings 		if (myentry->offset > entry->offset)
275*2b281117SSeth Jennings 			link = &(*link)->rb_left;
276*2b281117SSeth Jennings 		else if (myentry->offset < entry->offset)
277*2b281117SSeth Jennings 			link = &(*link)->rb_right;
278*2b281117SSeth Jennings 		else {
279*2b281117SSeth Jennings 			*dupentry = myentry;
280*2b281117SSeth Jennings 			return -EEXIST;
281*2b281117SSeth Jennings 		}
282*2b281117SSeth Jennings 	}
283*2b281117SSeth Jennings 	rb_link_node(&entry->rbnode, parent, link);
284*2b281117SSeth Jennings 	rb_insert_color(&entry->rbnode, root);
285*2b281117SSeth Jennings 	return 0;
286*2b281117SSeth Jennings }
287*2b281117SSeth Jennings 
288*2b281117SSeth Jennings /*********************************
289*2b281117SSeth Jennings * per-cpu code
290*2b281117SSeth Jennings **********************************/
291*2b281117SSeth Jennings static DEFINE_PER_CPU(u8 *, zswap_dstmem);
292*2b281117SSeth Jennings 
293*2b281117SSeth Jennings static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
294*2b281117SSeth Jennings {
295*2b281117SSeth Jennings 	struct crypto_comp *tfm;
296*2b281117SSeth Jennings 	u8 *dst;
297*2b281117SSeth Jennings 
298*2b281117SSeth Jennings 	switch (action) {
299*2b281117SSeth Jennings 	case CPU_UP_PREPARE:
300*2b281117SSeth Jennings 		tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
301*2b281117SSeth Jennings 		if (IS_ERR(tfm)) {
302*2b281117SSeth Jennings 			pr_err("can't allocate compressor transform\n");
303*2b281117SSeth Jennings 			return NOTIFY_BAD;
304*2b281117SSeth Jennings 		}
305*2b281117SSeth Jennings 		*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
306*2b281117SSeth Jennings 		dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
307*2b281117SSeth Jennings 		if (!dst) {
308*2b281117SSeth Jennings 			pr_err("can't allocate compressor buffer\n");
309*2b281117SSeth Jennings 			crypto_free_comp(tfm);
310*2b281117SSeth Jennings 			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
311*2b281117SSeth Jennings 			return NOTIFY_BAD;
312*2b281117SSeth Jennings 		}
313*2b281117SSeth Jennings 		per_cpu(zswap_dstmem, cpu) = dst;
314*2b281117SSeth Jennings 		break;
315*2b281117SSeth Jennings 	case CPU_DEAD:
316*2b281117SSeth Jennings 	case CPU_UP_CANCELED:
317*2b281117SSeth Jennings 		tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
318*2b281117SSeth Jennings 		if (tfm) {
319*2b281117SSeth Jennings 			crypto_free_comp(tfm);
320*2b281117SSeth Jennings 			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
321*2b281117SSeth Jennings 		}
322*2b281117SSeth Jennings 		dst = per_cpu(zswap_dstmem, cpu);
323*2b281117SSeth Jennings 		kfree(dst);
324*2b281117SSeth Jennings 		per_cpu(zswap_dstmem, cpu) = NULL;
325*2b281117SSeth Jennings 		break;
326*2b281117SSeth Jennings 	default:
327*2b281117SSeth Jennings 		break;
328*2b281117SSeth Jennings 	}
329*2b281117SSeth Jennings 	return NOTIFY_OK;
330*2b281117SSeth Jennings }
331*2b281117SSeth Jennings 
332*2b281117SSeth Jennings static int zswap_cpu_notifier(struct notifier_block *nb,
333*2b281117SSeth Jennings 				unsigned long action, void *pcpu)
334*2b281117SSeth Jennings {
335*2b281117SSeth Jennings 	unsigned long cpu = (unsigned long)pcpu;
336*2b281117SSeth Jennings 	return __zswap_cpu_notifier(action, cpu);
337*2b281117SSeth Jennings }
338*2b281117SSeth Jennings 
339*2b281117SSeth Jennings static struct notifier_block zswap_cpu_notifier_block = {
340*2b281117SSeth Jennings 	.notifier_call = zswap_cpu_notifier
341*2b281117SSeth Jennings };
342*2b281117SSeth Jennings 
343*2b281117SSeth Jennings static int zswap_cpu_init(void)
344*2b281117SSeth Jennings {
345*2b281117SSeth Jennings 	unsigned long cpu;
346*2b281117SSeth Jennings 
347*2b281117SSeth Jennings 	get_online_cpus();
348*2b281117SSeth Jennings 	for_each_online_cpu(cpu)
349*2b281117SSeth Jennings 		if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
350*2b281117SSeth Jennings 			goto cleanup;
351*2b281117SSeth Jennings 	register_cpu_notifier(&zswap_cpu_notifier_block);
352*2b281117SSeth Jennings 	put_online_cpus();
353*2b281117SSeth Jennings 	return 0;
354*2b281117SSeth Jennings 
355*2b281117SSeth Jennings cleanup:
356*2b281117SSeth Jennings 	for_each_online_cpu(cpu)
357*2b281117SSeth Jennings 		__zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
358*2b281117SSeth Jennings 	put_online_cpus();
359*2b281117SSeth Jennings 	return -ENOMEM;
360*2b281117SSeth Jennings }
361*2b281117SSeth Jennings 
362*2b281117SSeth Jennings /*********************************
363*2b281117SSeth Jennings * helpers
364*2b281117SSeth Jennings **********************************/
365*2b281117SSeth Jennings static bool zswap_is_full(void)
366*2b281117SSeth Jennings {
367*2b281117SSeth Jennings 	return (totalram_pages * zswap_max_pool_percent / 100 <
368*2b281117SSeth Jennings 		zswap_pool_pages);
369*2b281117SSeth Jennings }
370*2b281117SSeth Jennings 
371*2b281117SSeth Jennings /*
372*2b281117SSeth Jennings  * Carries out the common pattern of freeing and entry's zsmalloc allocation,
373*2b281117SSeth Jennings  * freeing the entry itself, and decrementing the number of stored pages.
374*2b281117SSeth Jennings  */
375*2b281117SSeth Jennings static void zswap_free_entry(struct zswap_tree *tree, struct zswap_entry *entry)
376*2b281117SSeth Jennings {
377*2b281117SSeth Jennings 	zbud_free(tree->pool, entry->handle);
378*2b281117SSeth Jennings 	zswap_entry_cache_free(entry);
379*2b281117SSeth Jennings 	atomic_dec(&zswap_stored_pages);
380*2b281117SSeth Jennings 	zswap_pool_pages = zbud_get_pool_size(tree->pool);
381*2b281117SSeth Jennings }
382*2b281117SSeth Jennings 
383*2b281117SSeth Jennings /*********************************
384*2b281117SSeth Jennings * writeback code
385*2b281117SSeth Jennings **********************************/
386*2b281117SSeth Jennings /* return enum for zswap_get_swap_cache_page */
387*2b281117SSeth Jennings enum zswap_get_swap_ret {
388*2b281117SSeth Jennings 	ZSWAP_SWAPCACHE_NEW,
389*2b281117SSeth Jennings 	ZSWAP_SWAPCACHE_EXIST,
390*2b281117SSeth Jennings 	ZSWAP_SWAPCACHE_NOMEM
391*2b281117SSeth Jennings };
392*2b281117SSeth Jennings 
393*2b281117SSeth Jennings /*
394*2b281117SSeth Jennings  * zswap_get_swap_cache_page
395*2b281117SSeth Jennings  *
396*2b281117SSeth Jennings  * This is an adaption of read_swap_cache_async()
397*2b281117SSeth Jennings  *
398*2b281117SSeth Jennings  * This function tries to find a page with the given swap entry
399*2b281117SSeth Jennings  * in the swapper_space address space (the swap cache).  If the page
400*2b281117SSeth Jennings  * is found, it is returned in retpage.  Otherwise, a page is allocated,
401*2b281117SSeth Jennings  * added to the swap cache, and returned in retpage.
402*2b281117SSeth Jennings  *
403*2b281117SSeth Jennings  * If success, the swap cache page is returned in retpage
404*2b281117SSeth Jennings  * Returns 0 if page was already in the swap cache, page is not locked
405*2b281117SSeth Jennings  * Returns 1 if the new page needs to be populated, page is locked
406*2b281117SSeth Jennings  * Returns <0 on error
407*2b281117SSeth Jennings  */
408*2b281117SSeth Jennings static int zswap_get_swap_cache_page(swp_entry_t entry,
409*2b281117SSeth Jennings 				struct page **retpage)
410*2b281117SSeth Jennings {
411*2b281117SSeth Jennings 	struct page *found_page, *new_page = NULL;
412*2b281117SSeth Jennings 	struct address_space *swapper_space = &swapper_spaces[swp_type(entry)];
413*2b281117SSeth Jennings 	int err;
414*2b281117SSeth Jennings 
415*2b281117SSeth Jennings 	*retpage = NULL;
416*2b281117SSeth Jennings 	do {
417*2b281117SSeth Jennings 		/*
418*2b281117SSeth Jennings 		 * First check the swap cache.  Since this is normally
419*2b281117SSeth Jennings 		 * called after lookup_swap_cache() failed, re-calling
420*2b281117SSeth Jennings 		 * that would confuse statistics.
421*2b281117SSeth Jennings 		 */
422*2b281117SSeth Jennings 		found_page = find_get_page(swapper_space, entry.val);
423*2b281117SSeth Jennings 		if (found_page)
424*2b281117SSeth Jennings 			break;
425*2b281117SSeth Jennings 
426*2b281117SSeth Jennings 		/*
427*2b281117SSeth Jennings 		 * Get a new page to read into from swap.
428*2b281117SSeth Jennings 		 */
429*2b281117SSeth Jennings 		if (!new_page) {
430*2b281117SSeth Jennings 			new_page = alloc_page(GFP_KERNEL);
431*2b281117SSeth Jennings 			if (!new_page)
432*2b281117SSeth Jennings 				break; /* Out of memory */
433*2b281117SSeth Jennings 		}
434*2b281117SSeth Jennings 
435*2b281117SSeth Jennings 		/*
436*2b281117SSeth Jennings 		 * call radix_tree_preload() while we can wait.
437*2b281117SSeth Jennings 		 */
438*2b281117SSeth Jennings 		err = radix_tree_preload(GFP_KERNEL);
439*2b281117SSeth Jennings 		if (err)
440*2b281117SSeth Jennings 			break;
441*2b281117SSeth Jennings 
442*2b281117SSeth Jennings 		/*
443*2b281117SSeth Jennings 		 * Swap entry may have been freed since our caller observed it.
444*2b281117SSeth Jennings 		 */
445*2b281117SSeth Jennings 		err = swapcache_prepare(entry);
446*2b281117SSeth Jennings 		if (err == -EEXIST) { /* seems racy */
447*2b281117SSeth Jennings 			radix_tree_preload_end();
448*2b281117SSeth Jennings 			continue;
449*2b281117SSeth Jennings 		}
450*2b281117SSeth Jennings 		if (err) { /* swp entry is obsolete ? */
451*2b281117SSeth Jennings 			radix_tree_preload_end();
452*2b281117SSeth Jennings 			break;
453*2b281117SSeth Jennings 		}
454*2b281117SSeth Jennings 
455*2b281117SSeth Jennings 		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
456*2b281117SSeth Jennings 		__set_page_locked(new_page);
457*2b281117SSeth Jennings 		SetPageSwapBacked(new_page);
458*2b281117SSeth Jennings 		err = __add_to_swap_cache(new_page, entry);
459*2b281117SSeth Jennings 		if (likely(!err)) {
460*2b281117SSeth Jennings 			radix_tree_preload_end();
461*2b281117SSeth Jennings 			lru_cache_add_anon(new_page);
462*2b281117SSeth Jennings 			*retpage = new_page;
463*2b281117SSeth Jennings 			return ZSWAP_SWAPCACHE_NEW;
464*2b281117SSeth Jennings 		}
465*2b281117SSeth Jennings 		radix_tree_preload_end();
466*2b281117SSeth Jennings 		ClearPageSwapBacked(new_page);
467*2b281117SSeth Jennings 		__clear_page_locked(new_page);
468*2b281117SSeth Jennings 		/*
469*2b281117SSeth Jennings 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
470*2b281117SSeth Jennings 		 * clear SWAP_HAS_CACHE flag.
471*2b281117SSeth Jennings 		 */
472*2b281117SSeth Jennings 		swapcache_free(entry, NULL);
473*2b281117SSeth Jennings 	} while (err != -ENOMEM);
474*2b281117SSeth Jennings 
475*2b281117SSeth Jennings 	if (new_page)
476*2b281117SSeth Jennings 		page_cache_release(new_page);
477*2b281117SSeth Jennings 	if (!found_page)
478*2b281117SSeth Jennings 		return ZSWAP_SWAPCACHE_NOMEM;
479*2b281117SSeth Jennings 	*retpage = found_page;
480*2b281117SSeth Jennings 	return ZSWAP_SWAPCACHE_EXIST;
481*2b281117SSeth Jennings }
482*2b281117SSeth Jennings 
483*2b281117SSeth Jennings /*
484*2b281117SSeth Jennings  * Attempts to free an entry by adding a page to the swap cache,
485*2b281117SSeth Jennings  * decompressing the entry data into the page, and issuing a
486*2b281117SSeth Jennings  * bio write to write the page back to the swap device.
487*2b281117SSeth Jennings  *
488*2b281117SSeth Jennings  * This can be thought of as a "resumed writeback" of the page
489*2b281117SSeth Jennings  * to the swap device.  We are basically resuming the same swap
490*2b281117SSeth Jennings  * writeback path that was intercepted with the frontswap_store()
491*2b281117SSeth Jennings  * in the first place.  After the page has been decompressed into
492*2b281117SSeth Jennings  * the swap cache, the compressed version stored by zswap can be
493*2b281117SSeth Jennings  * freed.
494*2b281117SSeth Jennings  */
495*2b281117SSeth Jennings static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
496*2b281117SSeth Jennings {
497*2b281117SSeth Jennings 	struct zswap_header *zhdr;
498*2b281117SSeth Jennings 	swp_entry_t swpentry;
499*2b281117SSeth Jennings 	struct zswap_tree *tree;
500*2b281117SSeth Jennings 	pgoff_t offset;
501*2b281117SSeth Jennings 	struct zswap_entry *entry;
502*2b281117SSeth Jennings 	struct page *page;
503*2b281117SSeth Jennings 	u8 *src, *dst;
504*2b281117SSeth Jennings 	unsigned int dlen;
505*2b281117SSeth Jennings 	int ret, refcount;
506*2b281117SSeth Jennings 	struct writeback_control wbc = {
507*2b281117SSeth Jennings 		.sync_mode = WB_SYNC_NONE,
508*2b281117SSeth Jennings 	};
509*2b281117SSeth Jennings 
510*2b281117SSeth Jennings 	/* extract swpentry from data */
511*2b281117SSeth Jennings 	zhdr = zbud_map(pool, handle);
512*2b281117SSeth Jennings 	swpentry = zhdr->swpentry; /* here */
513*2b281117SSeth Jennings 	zbud_unmap(pool, handle);
514*2b281117SSeth Jennings 	tree = zswap_trees[swp_type(swpentry)];
515*2b281117SSeth Jennings 	offset = swp_offset(swpentry);
516*2b281117SSeth Jennings 	BUG_ON(pool != tree->pool);
517*2b281117SSeth Jennings 
518*2b281117SSeth Jennings 	/* find and ref zswap entry */
519*2b281117SSeth Jennings 	spin_lock(&tree->lock);
520*2b281117SSeth Jennings 	entry = zswap_rb_search(&tree->rbroot, offset);
521*2b281117SSeth Jennings 	if (!entry) {
522*2b281117SSeth Jennings 		/* entry was invalidated */
523*2b281117SSeth Jennings 		spin_unlock(&tree->lock);
524*2b281117SSeth Jennings 		return 0;
525*2b281117SSeth Jennings 	}
526*2b281117SSeth Jennings 	zswap_entry_get(entry);
527*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
528*2b281117SSeth Jennings 	BUG_ON(offset != entry->offset);
529*2b281117SSeth Jennings 
530*2b281117SSeth Jennings 	/* try to allocate swap cache page */
531*2b281117SSeth Jennings 	switch (zswap_get_swap_cache_page(swpentry, &page)) {
532*2b281117SSeth Jennings 	case ZSWAP_SWAPCACHE_NOMEM: /* no memory */
533*2b281117SSeth Jennings 		ret = -ENOMEM;
534*2b281117SSeth Jennings 		goto fail;
535*2b281117SSeth Jennings 
536*2b281117SSeth Jennings 	case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */
537*2b281117SSeth Jennings 		/* page is already in the swap cache, ignore for now */
538*2b281117SSeth Jennings 		page_cache_release(page);
539*2b281117SSeth Jennings 		ret = -EEXIST;
540*2b281117SSeth Jennings 		goto fail;
541*2b281117SSeth Jennings 
542*2b281117SSeth Jennings 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
543*2b281117SSeth Jennings 		/* decompress */
544*2b281117SSeth Jennings 		dlen = PAGE_SIZE;
545*2b281117SSeth Jennings 		src = (u8 *)zbud_map(tree->pool, entry->handle) +
546*2b281117SSeth Jennings 			sizeof(struct zswap_header);
547*2b281117SSeth Jennings 		dst = kmap_atomic(page);
548*2b281117SSeth Jennings 		ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
549*2b281117SSeth Jennings 				entry->length, dst, &dlen);
550*2b281117SSeth Jennings 		kunmap_atomic(dst);
551*2b281117SSeth Jennings 		zbud_unmap(tree->pool, entry->handle);
552*2b281117SSeth Jennings 		BUG_ON(ret);
553*2b281117SSeth Jennings 		BUG_ON(dlen != PAGE_SIZE);
554*2b281117SSeth Jennings 
555*2b281117SSeth Jennings 		/* page is up to date */
556*2b281117SSeth Jennings 		SetPageUptodate(page);
557*2b281117SSeth Jennings 	}
558*2b281117SSeth Jennings 
559*2b281117SSeth Jennings 	/* start writeback */
560*2b281117SSeth Jennings 	__swap_writepage(page, &wbc, end_swap_bio_write);
561*2b281117SSeth Jennings 	page_cache_release(page);
562*2b281117SSeth Jennings 	zswap_written_back_pages++;
563*2b281117SSeth Jennings 
564*2b281117SSeth Jennings 	spin_lock(&tree->lock);
565*2b281117SSeth Jennings 
566*2b281117SSeth Jennings 	/* drop local reference */
567*2b281117SSeth Jennings 	zswap_entry_put(entry);
568*2b281117SSeth Jennings 	/* drop the initial reference from entry creation */
569*2b281117SSeth Jennings 	refcount = zswap_entry_put(entry);
570*2b281117SSeth Jennings 
571*2b281117SSeth Jennings 	/*
572*2b281117SSeth Jennings 	 * There are three possible values for refcount here:
573*2b281117SSeth Jennings 	 * (1) refcount is 1, load is in progress, unlink from rbtree,
574*2b281117SSeth Jennings 	 *     load will free
575*2b281117SSeth Jennings 	 * (2) refcount is 0, (normal case) entry is valid,
576*2b281117SSeth Jennings 	 *     remove from rbtree and free entry
577*2b281117SSeth Jennings 	 * (3) refcount is -1, invalidate happened during writeback,
578*2b281117SSeth Jennings 	 *     free entry
579*2b281117SSeth Jennings 	 */
580*2b281117SSeth Jennings 	if (refcount >= 0) {
581*2b281117SSeth Jennings 		/* no invalidate yet, remove from rbtree */
582*2b281117SSeth Jennings 		rb_erase(&entry->rbnode, &tree->rbroot);
583*2b281117SSeth Jennings 	}
584*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
585*2b281117SSeth Jennings 	if (refcount <= 0) {
586*2b281117SSeth Jennings 		/* free the entry */
587*2b281117SSeth Jennings 		zswap_free_entry(tree, entry);
588*2b281117SSeth Jennings 		return 0;
589*2b281117SSeth Jennings 	}
590*2b281117SSeth Jennings 	return -EAGAIN;
591*2b281117SSeth Jennings 
592*2b281117SSeth Jennings fail:
593*2b281117SSeth Jennings 	spin_lock(&tree->lock);
594*2b281117SSeth Jennings 	zswap_entry_put(entry);
595*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
596*2b281117SSeth Jennings 	return ret;
597*2b281117SSeth Jennings }
598*2b281117SSeth Jennings 
599*2b281117SSeth Jennings /*********************************
600*2b281117SSeth Jennings * frontswap hooks
601*2b281117SSeth Jennings **********************************/
602*2b281117SSeth Jennings /* attempts to compress and store an single page */
603*2b281117SSeth Jennings static int zswap_frontswap_store(unsigned type, pgoff_t offset,
604*2b281117SSeth Jennings 				struct page *page)
605*2b281117SSeth Jennings {
606*2b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
607*2b281117SSeth Jennings 	struct zswap_entry *entry, *dupentry;
608*2b281117SSeth Jennings 	int ret;
609*2b281117SSeth Jennings 	unsigned int dlen = PAGE_SIZE, len;
610*2b281117SSeth Jennings 	unsigned long handle;
611*2b281117SSeth Jennings 	char *buf;
612*2b281117SSeth Jennings 	u8 *src, *dst;
613*2b281117SSeth Jennings 	struct zswap_header *zhdr;
614*2b281117SSeth Jennings 
615*2b281117SSeth Jennings 	if (!tree) {
616*2b281117SSeth Jennings 		ret = -ENODEV;
617*2b281117SSeth Jennings 		goto reject;
618*2b281117SSeth Jennings 	}
619*2b281117SSeth Jennings 
620*2b281117SSeth Jennings 	/* reclaim space if needed */
621*2b281117SSeth Jennings 	if (zswap_is_full()) {
622*2b281117SSeth Jennings 		zswap_pool_limit_hit++;
623*2b281117SSeth Jennings 		if (zbud_reclaim_page(tree->pool, 8)) {
624*2b281117SSeth Jennings 			zswap_reject_reclaim_fail++;
625*2b281117SSeth Jennings 			ret = -ENOMEM;
626*2b281117SSeth Jennings 			goto reject;
627*2b281117SSeth Jennings 		}
628*2b281117SSeth Jennings 	}
629*2b281117SSeth Jennings 
630*2b281117SSeth Jennings 	/* allocate entry */
631*2b281117SSeth Jennings 	entry = zswap_entry_cache_alloc(GFP_KERNEL);
632*2b281117SSeth Jennings 	if (!entry) {
633*2b281117SSeth Jennings 		zswap_reject_kmemcache_fail++;
634*2b281117SSeth Jennings 		ret = -ENOMEM;
635*2b281117SSeth Jennings 		goto reject;
636*2b281117SSeth Jennings 	}
637*2b281117SSeth Jennings 
638*2b281117SSeth Jennings 	/* compress */
639*2b281117SSeth Jennings 	dst = get_cpu_var(zswap_dstmem);
640*2b281117SSeth Jennings 	src = kmap_atomic(page);
641*2b281117SSeth Jennings 	ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
642*2b281117SSeth Jennings 	kunmap_atomic(src);
643*2b281117SSeth Jennings 	if (ret) {
644*2b281117SSeth Jennings 		ret = -EINVAL;
645*2b281117SSeth Jennings 		goto freepage;
646*2b281117SSeth Jennings 	}
647*2b281117SSeth Jennings 
648*2b281117SSeth Jennings 	/* store */
649*2b281117SSeth Jennings 	len = dlen + sizeof(struct zswap_header);
650*2b281117SSeth Jennings 	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
651*2b281117SSeth Jennings 		&handle);
652*2b281117SSeth Jennings 	if (ret == -ENOSPC) {
653*2b281117SSeth Jennings 		zswap_reject_compress_poor++;
654*2b281117SSeth Jennings 		goto freepage;
655*2b281117SSeth Jennings 	}
656*2b281117SSeth Jennings 	if (ret) {
657*2b281117SSeth Jennings 		zswap_reject_alloc_fail++;
658*2b281117SSeth Jennings 		goto freepage;
659*2b281117SSeth Jennings 	}
660*2b281117SSeth Jennings 	zhdr = zbud_map(tree->pool, handle);
661*2b281117SSeth Jennings 	zhdr->swpentry = swp_entry(type, offset);
662*2b281117SSeth Jennings 	buf = (u8 *)(zhdr + 1);
663*2b281117SSeth Jennings 	memcpy(buf, dst, dlen);
664*2b281117SSeth Jennings 	zbud_unmap(tree->pool, handle);
665*2b281117SSeth Jennings 	put_cpu_var(zswap_dstmem);
666*2b281117SSeth Jennings 
667*2b281117SSeth Jennings 	/* populate entry */
668*2b281117SSeth Jennings 	entry->offset = offset;
669*2b281117SSeth Jennings 	entry->handle = handle;
670*2b281117SSeth Jennings 	entry->length = dlen;
671*2b281117SSeth Jennings 
672*2b281117SSeth Jennings 	/* map */
673*2b281117SSeth Jennings 	spin_lock(&tree->lock);
674*2b281117SSeth Jennings 	do {
675*2b281117SSeth Jennings 		ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
676*2b281117SSeth Jennings 		if (ret == -EEXIST) {
677*2b281117SSeth Jennings 			zswap_duplicate_entry++;
678*2b281117SSeth Jennings 			/* remove from rbtree */
679*2b281117SSeth Jennings 			rb_erase(&dupentry->rbnode, &tree->rbroot);
680*2b281117SSeth Jennings 			if (!zswap_entry_put(dupentry)) {
681*2b281117SSeth Jennings 				/* free */
682*2b281117SSeth Jennings 				zswap_free_entry(tree, dupentry);
683*2b281117SSeth Jennings 			}
684*2b281117SSeth Jennings 		}
685*2b281117SSeth Jennings 	} while (ret == -EEXIST);
686*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
687*2b281117SSeth Jennings 
688*2b281117SSeth Jennings 	/* update stats */
689*2b281117SSeth Jennings 	atomic_inc(&zswap_stored_pages);
690*2b281117SSeth Jennings 	zswap_pool_pages = zbud_get_pool_size(tree->pool);
691*2b281117SSeth Jennings 
692*2b281117SSeth Jennings 	return 0;
693*2b281117SSeth Jennings 
694*2b281117SSeth Jennings freepage:
695*2b281117SSeth Jennings 	put_cpu_var(zswap_dstmem);
696*2b281117SSeth Jennings 	zswap_entry_cache_free(entry);
697*2b281117SSeth Jennings reject:
698*2b281117SSeth Jennings 	return ret;
699*2b281117SSeth Jennings }
700*2b281117SSeth Jennings 
701*2b281117SSeth Jennings /*
702*2b281117SSeth Jennings  * returns 0 if the page was successfully decompressed
703*2b281117SSeth Jennings  * return -1 on entry not found or error
704*2b281117SSeth Jennings */
705*2b281117SSeth Jennings static int zswap_frontswap_load(unsigned type, pgoff_t offset,
706*2b281117SSeth Jennings 				struct page *page)
707*2b281117SSeth Jennings {
708*2b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
709*2b281117SSeth Jennings 	struct zswap_entry *entry;
710*2b281117SSeth Jennings 	u8 *src, *dst;
711*2b281117SSeth Jennings 	unsigned int dlen;
712*2b281117SSeth Jennings 	int refcount, ret;
713*2b281117SSeth Jennings 
714*2b281117SSeth Jennings 	/* find */
715*2b281117SSeth Jennings 	spin_lock(&tree->lock);
716*2b281117SSeth Jennings 	entry = zswap_rb_search(&tree->rbroot, offset);
717*2b281117SSeth Jennings 	if (!entry) {
718*2b281117SSeth Jennings 		/* entry was written back */
719*2b281117SSeth Jennings 		spin_unlock(&tree->lock);
720*2b281117SSeth Jennings 		return -1;
721*2b281117SSeth Jennings 	}
722*2b281117SSeth Jennings 	zswap_entry_get(entry);
723*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
724*2b281117SSeth Jennings 
725*2b281117SSeth Jennings 	/* decompress */
726*2b281117SSeth Jennings 	dlen = PAGE_SIZE;
727*2b281117SSeth Jennings 	src = (u8 *)zbud_map(tree->pool, entry->handle) +
728*2b281117SSeth Jennings 			sizeof(struct zswap_header);
729*2b281117SSeth Jennings 	dst = kmap_atomic(page);
730*2b281117SSeth Jennings 	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
731*2b281117SSeth Jennings 		dst, &dlen);
732*2b281117SSeth Jennings 	kunmap_atomic(dst);
733*2b281117SSeth Jennings 	zbud_unmap(tree->pool, entry->handle);
734*2b281117SSeth Jennings 	BUG_ON(ret);
735*2b281117SSeth Jennings 
736*2b281117SSeth Jennings 	spin_lock(&tree->lock);
737*2b281117SSeth Jennings 	refcount = zswap_entry_put(entry);
738*2b281117SSeth Jennings 	if (likely(refcount)) {
739*2b281117SSeth Jennings 		spin_unlock(&tree->lock);
740*2b281117SSeth Jennings 		return 0;
741*2b281117SSeth Jennings 	}
742*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
743*2b281117SSeth Jennings 
744*2b281117SSeth Jennings 	/*
745*2b281117SSeth Jennings 	 * We don't have to unlink from the rbtree because
746*2b281117SSeth Jennings 	 * zswap_writeback_entry() or zswap_frontswap_invalidate page()
747*2b281117SSeth Jennings 	 * has already done this for us if we are the last reference.
748*2b281117SSeth Jennings 	 */
749*2b281117SSeth Jennings 	/* free */
750*2b281117SSeth Jennings 
751*2b281117SSeth Jennings 	zswap_free_entry(tree, entry);
752*2b281117SSeth Jennings 
753*2b281117SSeth Jennings 	return 0;
754*2b281117SSeth Jennings }
755*2b281117SSeth Jennings 
756*2b281117SSeth Jennings /* frees an entry in zswap */
757*2b281117SSeth Jennings static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
758*2b281117SSeth Jennings {
759*2b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
760*2b281117SSeth Jennings 	struct zswap_entry *entry;
761*2b281117SSeth Jennings 	int refcount;
762*2b281117SSeth Jennings 
763*2b281117SSeth Jennings 	/* find */
764*2b281117SSeth Jennings 	spin_lock(&tree->lock);
765*2b281117SSeth Jennings 	entry = zswap_rb_search(&tree->rbroot, offset);
766*2b281117SSeth Jennings 	if (!entry) {
767*2b281117SSeth Jennings 		/* entry was written back */
768*2b281117SSeth Jennings 		spin_unlock(&tree->lock);
769*2b281117SSeth Jennings 		return;
770*2b281117SSeth Jennings 	}
771*2b281117SSeth Jennings 
772*2b281117SSeth Jennings 	/* remove from rbtree */
773*2b281117SSeth Jennings 	rb_erase(&entry->rbnode, &tree->rbroot);
774*2b281117SSeth Jennings 
775*2b281117SSeth Jennings 	/* drop the initial reference from entry creation */
776*2b281117SSeth Jennings 	refcount = zswap_entry_put(entry);
777*2b281117SSeth Jennings 
778*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
779*2b281117SSeth Jennings 
780*2b281117SSeth Jennings 	if (refcount) {
781*2b281117SSeth Jennings 		/* writeback in progress, writeback will free */
782*2b281117SSeth Jennings 		return;
783*2b281117SSeth Jennings 	}
784*2b281117SSeth Jennings 
785*2b281117SSeth Jennings 	/* free */
786*2b281117SSeth Jennings 	zswap_free_entry(tree, entry);
787*2b281117SSeth Jennings }
788*2b281117SSeth Jennings 
789*2b281117SSeth Jennings /* frees all zswap entries for the given swap type */
790*2b281117SSeth Jennings static void zswap_frontswap_invalidate_area(unsigned type)
791*2b281117SSeth Jennings {
792*2b281117SSeth Jennings 	struct zswap_tree *tree = zswap_trees[type];
793*2b281117SSeth Jennings 	struct rb_node *node;
794*2b281117SSeth Jennings 	struct zswap_entry *entry;
795*2b281117SSeth Jennings 
796*2b281117SSeth Jennings 	if (!tree)
797*2b281117SSeth Jennings 		return;
798*2b281117SSeth Jennings 
799*2b281117SSeth Jennings 	/* walk the tree and free everything */
800*2b281117SSeth Jennings 	spin_lock(&tree->lock);
801*2b281117SSeth Jennings 	/*
802*2b281117SSeth Jennings 	 * TODO: Even though this code should not be executed because
803*2b281117SSeth Jennings 	 * the try_to_unuse() in swapoff should have emptied the tree,
804*2b281117SSeth Jennings 	 * it is very wasteful to rebalance the tree after every
805*2b281117SSeth Jennings 	 * removal when we are freeing the whole tree.
806*2b281117SSeth Jennings 	 *
807*2b281117SSeth Jennings 	 * If post-order traversal code is ever added to the rbtree
808*2b281117SSeth Jennings 	 * implementation, it should be used here.
809*2b281117SSeth Jennings 	 */
810*2b281117SSeth Jennings 	while ((node = rb_first(&tree->rbroot))) {
811*2b281117SSeth Jennings 		entry = rb_entry(node, struct zswap_entry, rbnode);
812*2b281117SSeth Jennings 		rb_erase(&entry->rbnode, &tree->rbroot);
813*2b281117SSeth Jennings 		zbud_free(tree->pool, entry->handle);
814*2b281117SSeth Jennings 		zswap_entry_cache_free(entry);
815*2b281117SSeth Jennings 		atomic_dec(&zswap_stored_pages);
816*2b281117SSeth Jennings 	}
817*2b281117SSeth Jennings 	tree->rbroot = RB_ROOT;
818*2b281117SSeth Jennings 	spin_unlock(&tree->lock);
819*2b281117SSeth Jennings }
820*2b281117SSeth Jennings 
821*2b281117SSeth Jennings static struct zbud_ops zswap_zbud_ops = {
822*2b281117SSeth Jennings 	.evict = zswap_writeback_entry
823*2b281117SSeth Jennings };
824*2b281117SSeth Jennings 
825*2b281117SSeth Jennings static void zswap_frontswap_init(unsigned type)
826*2b281117SSeth Jennings {
827*2b281117SSeth Jennings 	struct zswap_tree *tree;
828*2b281117SSeth Jennings 
829*2b281117SSeth Jennings 	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
830*2b281117SSeth Jennings 	if (!tree)
831*2b281117SSeth Jennings 		goto err;
832*2b281117SSeth Jennings 	tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
833*2b281117SSeth Jennings 	if (!tree->pool)
834*2b281117SSeth Jennings 		goto freetree;
835*2b281117SSeth Jennings 	tree->rbroot = RB_ROOT;
836*2b281117SSeth Jennings 	spin_lock_init(&tree->lock);
837*2b281117SSeth Jennings 	zswap_trees[type] = tree;
838*2b281117SSeth Jennings 	return;
839*2b281117SSeth Jennings 
840*2b281117SSeth Jennings freetree:
841*2b281117SSeth Jennings 	kfree(tree);
842*2b281117SSeth Jennings err:
843*2b281117SSeth Jennings 	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
844*2b281117SSeth Jennings }
845*2b281117SSeth Jennings 
846*2b281117SSeth Jennings static struct frontswap_ops zswap_frontswap_ops = {
847*2b281117SSeth Jennings 	.store = zswap_frontswap_store,
848*2b281117SSeth Jennings 	.load = zswap_frontswap_load,
849*2b281117SSeth Jennings 	.invalidate_page = zswap_frontswap_invalidate_page,
850*2b281117SSeth Jennings 	.invalidate_area = zswap_frontswap_invalidate_area,
851*2b281117SSeth Jennings 	.init = zswap_frontswap_init
852*2b281117SSeth Jennings };
853*2b281117SSeth Jennings 
854*2b281117SSeth Jennings /*********************************
855*2b281117SSeth Jennings * debugfs functions
856*2b281117SSeth Jennings **********************************/
857*2b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS
858*2b281117SSeth Jennings #include <linux/debugfs.h>
859*2b281117SSeth Jennings 
860*2b281117SSeth Jennings static struct dentry *zswap_debugfs_root;
861*2b281117SSeth Jennings 
862*2b281117SSeth Jennings static int __init zswap_debugfs_init(void)
863*2b281117SSeth Jennings {
864*2b281117SSeth Jennings 	if (!debugfs_initialized())
865*2b281117SSeth Jennings 		return -ENODEV;
866*2b281117SSeth Jennings 
867*2b281117SSeth Jennings 	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
868*2b281117SSeth Jennings 	if (!zswap_debugfs_root)
869*2b281117SSeth Jennings 		return -ENOMEM;
870*2b281117SSeth Jennings 
871*2b281117SSeth Jennings 	debugfs_create_u64("pool_limit_hit", S_IRUGO,
872*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_pool_limit_hit);
873*2b281117SSeth Jennings 	debugfs_create_u64("reject_reclaim_fail", S_IRUGO,
874*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_reclaim_fail);
875*2b281117SSeth Jennings 	debugfs_create_u64("reject_alloc_fail", S_IRUGO,
876*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_alloc_fail);
877*2b281117SSeth Jennings 	debugfs_create_u64("reject_kmemcache_fail", S_IRUGO,
878*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_kmemcache_fail);
879*2b281117SSeth Jennings 	debugfs_create_u64("reject_compress_poor", S_IRUGO,
880*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_reject_compress_poor);
881*2b281117SSeth Jennings 	debugfs_create_u64("written_back_pages", S_IRUGO,
882*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_written_back_pages);
883*2b281117SSeth Jennings 	debugfs_create_u64("duplicate_entry", S_IRUGO,
884*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_duplicate_entry);
885*2b281117SSeth Jennings 	debugfs_create_u64("pool_pages", S_IRUGO,
886*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_pool_pages);
887*2b281117SSeth Jennings 	debugfs_create_atomic_t("stored_pages", S_IRUGO,
888*2b281117SSeth Jennings 			zswap_debugfs_root, &zswap_stored_pages);
889*2b281117SSeth Jennings 
890*2b281117SSeth Jennings 	return 0;
891*2b281117SSeth Jennings }
892*2b281117SSeth Jennings 
893*2b281117SSeth Jennings static void __exit zswap_debugfs_exit(void)
894*2b281117SSeth Jennings {
895*2b281117SSeth Jennings 	debugfs_remove_recursive(zswap_debugfs_root);
896*2b281117SSeth Jennings }
897*2b281117SSeth Jennings #else
898*2b281117SSeth Jennings static int __init zswap_debugfs_init(void)
899*2b281117SSeth Jennings {
900*2b281117SSeth Jennings 	return 0;
901*2b281117SSeth Jennings }
902*2b281117SSeth Jennings 
903*2b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) { }
904*2b281117SSeth Jennings #endif
905*2b281117SSeth Jennings 
906*2b281117SSeth Jennings /*********************************
907*2b281117SSeth Jennings * module init and exit
908*2b281117SSeth Jennings **********************************/
909*2b281117SSeth Jennings static int __init init_zswap(void)
910*2b281117SSeth Jennings {
911*2b281117SSeth Jennings 	if (!zswap_enabled)
912*2b281117SSeth Jennings 		return 0;
913*2b281117SSeth Jennings 
914*2b281117SSeth Jennings 	pr_info("loading zswap\n");
915*2b281117SSeth Jennings 	if (zswap_entry_cache_create()) {
916*2b281117SSeth Jennings 		pr_err("entry cache creation failed\n");
917*2b281117SSeth Jennings 		goto error;
918*2b281117SSeth Jennings 	}
919*2b281117SSeth Jennings 	if (zswap_comp_init()) {
920*2b281117SSeth Jennings 		pr_err("compressor initialization failed\n");
921*2b281117SSeth Jennings 		goto compfail;
922*2b281117SSeth Jennings 	}
923*2b281117SSeth Jennings 	if (zswap_cpu_init()) {
924*2b281117SSeth Jennings 		pr_err("per-cpu initialization failed\n");
925*2b281117SSeth Jennings 		goto pcpufail;
926*2b281117SSeth Jennings 	}
927*2b281117SSeth Jennings 	frontswap_register_ops(&zswap_frontswap_ops);
928*2b281117SSeth Jennings 	if (zswap_debugfs_init())
929*2b281117SSeth Jennings 		pr_warn("debugfs initialization failed\n");
930*2b281117SSeth Jennings 	return 0;
931*2b281117SSeth Jennings pcpufail:
932*2b281117SSeth Jennings 	zswap_comp_exit();
933*2b281117SSeth Jennings compfail:
934*2b281117SSeth Jennings 	zswap_entry_cache_destory();
935*2b281117SSeth Jennings error:
936*2b281117SSeth Jennings 	return -ENOMEM;
937*2b281117SSeth Jennings }
938*2b281117SSeth Jennings /* must be late so crypto has time to come up */
939*2b281117SSeth Jennings late_initcall(init_zswap);
940*2b281117SSeth Jennings 
941*2b281117SSeth Jennings MODULE_LICENSE("GPL");
942*2b281117SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
943*2b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages");
944