1*2b281117SSeth Jennings /* 2*2b281117SSeth Jennings * zswap.c - zswap driver file 3*2b281117SSeth Jennings * 4*2b281117SSeth Jennings * zswap is a backend for frontswap that takes pages that are in the process 5*2b281117SSeth Jennings * of being swapped out and attempts to compress and store them in a 6*2b281117SSeth Jennings * RAM-based memory pool. This can result in a significant I/O reduction on 7*2b281117SSeth Jennings * the swap device and, in the case where decompressing from RAM is faster 8*2b281117SSeth Jennings * than reading from the swap device, can also improve workload performance. 9*2b281117SSeth Jennings * 10*2b281117SSeth Jennings * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 11*2b281117SSeth Jennings * 12*2b281117SSeth Jennings * This program is free software; you can redistribute it and/or 13*2b281117SSeth Jennings * modify it under the terms of the GNU General Public License 14*2b281117SSeth Jennings * as published by the Free Software Foundation; either version 2 15*2b281117SSeth Jennings * of the License, or (at your option) any later version. 16*2b281117SSeth Jennings * 17*2b281117SSeth Jennings * This program is distributed in the hope that it will be useful, 18*2b281117SSeth Jennings * but WITHOUT ANY WARRANTY; without even the implied warranty of 19*2b281117SSeth Jennings * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20*2b281117SSeth Jennings * GNU General Public License for more details. 21*2b281117SSeth Jennings */ 22*2b281117SSeth Jennings 23*2b281117SSeth Jennings #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24*2b281117SSeth Jennings 25*2b281117SSeth Jennings #include <linux/module.h> 26*2b281117SSeth Jennings #include <linux/cpu.h> 27*2b281117SSeth Jennings #include <linux/highmem.h> 28*2b281117SSeth Jennings #include <linux/slab.h> 29*2b281117SSeth Jennings #include <linux/spinlock.h> 30*2b281117SSeth Jennings #include <linux/types.h> 31*2b281117SSeth Jennings #include <linux/atomic.h> 32*2b281117SSeth Jennings #include <linux/frontswap.h> 33*2b281117SSeth Jennings #include <linux/rbtree.h> 34*2b281117SSeth Jennings #include <linux/swap.h> 35*2b281117SSeth Jennings #include <linux/crypto.h> 36*2b281117SSeth Jennings #include <linux/mempool.h> 37*2b281117SSeth Jennings #include <linux/zbud.h> 38*2b281117SSeth Jennings 39*2b281117SSeth Jennings #include <linux/mm_types.h> 40*2b281117SSeth Jennings #include <linux/page-flags.h> 41*2b281117SSeth Jennings #include <linux/swapops.h> 42*2b281117SSeth Jennings #include <linux/writeback.h> 43*2b281117SSeth Jennings #include <linux/pagemap.h> 44*2b281117SSeth Jennings 45*2b281117SSeth Jennings /********************************* 46*2b281117SSeth Jennings * statistics 47*2b281117SSeth Jennings **********************************/ 48*2b281117SSeth Jennings /* Number of memory pages used by the compressed pool */ 49*2b281117SSeth Jennings static u64 zswap_pool_pages; 50*2b281117SSeth Jennings /* The number of compressed pages currently stored in zswap */ 51*2b281117SSeth Jennings static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 52*2b281117SSeth Jennings 53*2b281117SSeth Jennings /* 54*2b281117SSeth Jennings * The statistics below are not protected from concurrent access for 55*2b281117SSeth Jennings * performance reasons so they may not be a 100% accurate. However, 56*2b281117SSeth Jennings * they do provide useful information on roughly how many times a 57*2b281117SSeth Jennings * certain event is occurring. 58*2b281117SSeth Jennings */ 59*2b281117SSeth Jennings 60*2b281117SSeth Jennings /* Pool limit was hit (see zswap_max_pool_percent) */ 61*2b281117SSeth Jennings static u64 zswap_pool_limit_hit; 62*2b281117SSeth Jennings /* Pages written back when pool limit was reached */ 63*2b281117SSeth Jennings static u64 zswap_written_back_pages; 64*2b281117SSeth Jennings /* Store failed due to a reclaim failure after pool limit was reached */ 65*2b281117SSeth Jennings static u64 zswap_reject_reclaim_fail; 66*2b281117SSeth Jennings /* Compressed page was too big for the allocator to (optimally) store */ 67*2b281117SSeth Jennings static u64 zswap_reject_compress_poor; 68*2b281117SSeth Jennings /* Store failed because underlying allocator could not get memory */ 69*2b281117SSeth Jennings static u64 zswap_reject_alloc_fail; 70*2b281117SSeth Jennings /* Store failed because the entry metadata could not be allocated (rare) */ 71*2b281117SSeth Jennings static u64 zswap_reject_kmemcache_fail; 72*2b281117SSeth Jennings /* Duplicate store was encountered (rare) */ 73*2b281117SSeth Jennings static u64 zswap_duplicate_entry; 74*2b281117SSeth Jennings 75*2b281117SSeth Jennings /********************************* 76*2b281117SSeth Jennings * tunables 77*2b281117SSeth Jennings **********************************/ 78*2b281117SSeth Jennings /* Enable/disable zswap (disabled by default, fixed at boot for now) */ 79*2b281117SSeth Jennings static bool zswap_enabled __read_mostly; 80*2b281117SSeth Jennings module_param_named(enabled, zswap_enabled, bool, 0); 81*2b281117SSeth Jennings 82*2b281117SSeth Jennings /* Compressor to be used by zswap (fixed at boot for now) */ 83*2b281117SSeth Jennings #define ZSWAP_COMPRESSOR_DEFAULT "lzo" 84*2b281117SSeth Jennings static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 85*2b281117SSeth Jennings module_param_named(compressor, zswap_compressor, charp, 0); 86*2b281117SSeth Jennings 87*2b281117SSeth Jennings /* The maximum percentage of memory that the compressed pool can occupy */ 88*2b281117SSeth Jennings static unsigned int zswap_max_pool_percent = 20; 89*2b281117SSeth Jennings module_param_named(max_pool_percent, 90*2b281117SSeth Jennings zswap_max_pool_percent, uint, 0644); 91*2b281117SSeth Jennings 92*2b281117SSeth Jennings /********************************* 93*2b281117SSeth Jennings * compression functions 94*2b281117SSeth Jennings **********************************/ 95*2b281117SSeth Jennings /* per-cpu compression transforms */ 96*2b281117SSeth Jennings static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; 97*2b281117SSeth Jennings 98*2b281117SSeth Jennings enum comp_op { 99*2b281117SSeth Jennings ZSWAP_COMPOP_COMPRESS, 100*2b281117SSeth Jennings ZSWAP_COMPOP_DECOMPRESS 101*2b281117SSeth Jennings }; 102*2b281117SSeth Jennings 103*2b281117SSeth Jennings static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, 104*2b281117SSeth Jennings u8 *dst, unsigned int *dlen) 105*2b281117SSeth Jennings { 106*2b281117SSeth Jennings struct crypto_comp *tfm; 107*2b281117SSeth Jennings int ret; 108*2b281117SSeth Jennings 109*2b281117SSeth Jennings tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); 110*2b281117SSeth Jennings switch (op) { 111*2b281117SSeth Jennings case ZSWAP_COMPOP_COMPRESS: 112*2b281117SSeth Jennings ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 113*2b281117SSeth Jennings break; 114*2b281117SSeth Jennings case ZSWAP_COMPOP_DECOMPRESS: 115*2b281117SSeth Jennings ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 116*2b281117SSeth Jennings break; 117*2b281117SSeth Jennings default: 118*2b281117SSeth Jennings ret = -EINVAL; 119*2b281117SSeth Jennings } 120*2b281117SSeth Jennings 121*2b281117SSeth Jennings put_cpu(); 122*2b281117SSeth Jennings return ret; 123*2b281117SSeth Jennings } 124*2b281117SSeth Jennings 125*2b281117SSeth Jennings static int __init zswap_comp_init(void) 126*2b281117SSeth Jennings { 127*2b281117SSeth Jennings if (!crypto_has_comp(zswap_compressor, 0, 0)) { 128*2b281117SSeth Jennings pr_info("%s compressor not available\n", zswap_compressor); 129*2b281117SSeth Jennings /* fall back to default compressor */ 130*2b281117SSeth Jennings zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 131*2b281117SSeth Jennings if (!crypto_has_comp(zswap_compressor, 0, 0)) 132*2b281117SSeth Jennings /* can't even load the default compressor */ 133*2b281117SSeth Jennings return -ENODEV; 134*2b281117SSeth Jennings } 135*2b281117SSeth Jennings pr_info("using %s compressor\n", zswap_compressor); 136*2b281117SSeth Jennings 137*2b281117SSeth Jennings /* alloc percpu transforms */ 138*2b281117SSeth Jennings zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 139*2b281117SSeth Jennings if (!zswap_comp_pcpu_tfms) 140*2b281117SSeth Jennings return -ENOMEM; 141*2b281117SSeth Jennings return 0; 142*2b281117SSeth Jennings } 143*2b281117SSeth Jennings 144*2b281117SSeth Jennings static void zswap_comp_exit(void) 145*2b281117SSeth Jennings { 146*2b281117SSeth Jennings /* free percpu transforms */ 147*2b281117SSeth Jennings if (zswap_comp_pcpu_tfms) 148*2b281117SSeth Jennings free_percpu(zswap_comp_pcpu_tfms); 149*2b281117SSeth Jennings } 150*2b281117SSeth Jennings 151*2b281117SSeth Jennings /********************************* 152*2b281117SSeth Jennings * data structures 153*2b281117SSeth Jennings **********************************/ 154*2b281117SSeth Jennings /* 155*2b281117SSeth Jennings * struct zswap_entry 156*2b281117SSeth Jennings * 157*2b281117SSeth Jennings * This structure contains the metadata for tracking a single compressed 158*2b281117SSeth Jennings * page within zswap. 159*2b281117SSeth Jennings * 160*2b281117SSeth Jennings * rbnode - links the entry into red-black tree for the appropriate swap type 161*2b281117SSeth Jennings * refcount - the number of outstanding reference to the entry. This is needed 162*2b281117SSeth Jennings * to protect against premature freeing of the entry by code 163*2b281117SSeth Jennings * concurent calls to load, invalidate, and writeback. The lock 164*2b281117SSeth Jennings * for the zswap_tree structure that contains the entry must 165*2b281117SSeth Jennings * be held while changing the refcount. Since the lock must 166*2b281117SSeth Jennings * be held, there is no reason to also make refcount atomic. 167*2b281117SSeth Jennings * offset - the swap offset for the entry. Index into the red-black tree. 168*2b281117SSeth Jennings * handle - zsmalloc allocation handle that stores the compressed page data 169*2b281117SSeth Jennings * length - the length in bytes of the compressed page data. Needed during 170*2b281117SSeth Jennings * decompression 171*2b281117SSeth Jennings */ 172*2b281117SSeth Jennings struct zswap_entry { 173*2b281117SSeth Jennings struct rb_node rbnode; 174*2b281117SSeth Jennings pgoff_t offset; 175*2b281117SSeth Jennings int refcount; 176*2b281117SSeth Jennings unsigned int length; 177*2b281117SSeth Jennings unsigned long handle; 178*2b281117SSeth Jennings }; 179*2b281117SSeth Jennings 180*2b281117SSeth Jennings struct zswap_header { 181*2b281117SSeth Jennings swp_entry_t swpentry; 182*2b281117SSeth Jennings }; 183*2b281117SSeth Jennings 184*2b281117SSeth Jennings /* 185*2b281117SSeth Jennings * The tree lock in the zswap_tree struct protects a few things: 186*2b281117SSeth Jennings * - the rbtree 187*2b281117SSeth Jennings * - the refcount field of each entry in the tree 188*2b281117SSeth Jennings */ 189*2b281117SSeth Jennings struct zswap_tree { 190*2b281117SSeth Jennings struct rb_root rbroot; 191*2b281117SSeth Jennings spinlock_t lock; 192*2b281117SSeth Jennings struct zbud_pool *pool; 193*2b281117SSeth Jennings }; 194*2b281117SSeth Jennings 195*2b281117SSeth Jennings static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 196*2b281117SSeth Jennings 197*2b281117SSeth Jennings /********************************* 198*2b281117SSeth Jennings * zswap entry functions 199*2b281117SSeth Jennings **********************************/ 200*2b281117SSeth Jennings static struct kmem_cache *zswap_entry_cache; 201*2b281117SSeth Jennings 202*2b281117SSeth Jennings static int zswap_entry_cache_create(void) 203*2b281117SSeth Jennings { 204*2b281117SSeth Jennings zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 205*2b281117SSeth Jennings return (zswap_entry_cache == NULL); 206*2b281117SSeth Jennings } 207*2b281117SSeth Jennings 208*2b281117SSeth Jennings static void zswap_entry_cache_destory(void) 209*2b281117SSeth Jennings { 210*2b281117SSeth Jennings kmem_cache_destroy(zswap_entry_cache); 211*2b281117SSeth Jennings } 212*2b281117SSeth Jennings 213*2b281117SSeth Jennings static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 214*2b281117SSeth Jennings { 215*2b281117SSeth Jennings struct zswap_entry *entry; 216*2b281117SSeth Jennings entry = kmem_cache_alloc(zswap_entry_cache, gfp); 217*2b281117SSeth Jennings if (!entry) 218*2b281117SSeth Jennings return NULL; 219*2b281117SSeth Jennings entry->refcount = 1; 220*2b281117SSeth Jennings return entry; 221*2b281117SSeth Jennings } 222*2b281117SSeth Jennings 223*2b281117SSeth Jennings static void zswap_entry_cache_free(struct zswap_entry *entry) 224*2b281117SSeth Jennings { 225*2b281117SSeth Jennings kmem_cache_free(zswap_entry_cache, entry); 226*2b281117SSeth Jennings } 227*2b281117SSeth Jennings 228*2b281117SSeth Jennings /* caller must hold the tree lock */ 229*2b281117SSeth Jennings static void zswap_entry_get(struct zswap_entry *entry) 230*2b281117SSeth Jennings { 231*2b281117SSeth Jennings entry->refcount++; 232*2b281117SSeth Jennings } 233*2b281117SSeth Jennings 234*2b281117SSeth Jennings /* caller must hold the tree lock */ 235*2b281117SSeth Jennings static int zswap_entry_put(struct zswap_entry *entry) 236*2b281117SSeth Jennings { 237*2b281117SSeth Jennings entry->refcount--; 238*2b281117SSeth Jennings return entry->refcount; 239*2b281117SSeth Jennings } 240*2b281117SSeth Jennings 241*2b281117SSeth Jennings /********************************* 242*2b281117SSeth Jennings * rbtree functions 243*2b281117SSeth Jennings **********************************/ 244*2b281117SSeth Jennings static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 245*2b281117SSeth Jennings { 246*2b281117SSeth Jennings struct rb_node *node = root->rb_node; 247*2b281117SSeth Jennings struct zswap_entry *entry; 248*2b281117SSeth Jennings 249*2b281117SSeth Jennings while (node) { 250*2b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 251*2b281117SSeth Jennings if (entry->offset > offset) 252*2b281117SSeth Jennings node = node->rb_left; 253*2b281117SSeth Jennings else if (entry->offset < offset) 254*2b281117SSeth Jennings node = node->rb_right; 255*2b281117SSeth Jennings else 256*2b281117SSeth Jennings return entry; 257*2b281117SSeth Jennings } 258*2b281117SSeth Jennings return NULL; 259*2b281117SSeth Jennings } 260*2b281117SSeth Jennings 261*2b281117SSeth Jennings /* 262*2b281117SSeth Jennings * In the case that a entry with the same offset is found, a pointer to 263*2b281117SSeth Jennings * the existing entry is stored in dupentry and the function returns -EEXIST 264*2b281117SSeth Jennings */ 265*2b281117SSeth Jennings static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 266*2b281117SSeth Jennings struct zswap_entry **dupentry) 267*2b281117SSeth Jennings { 268*2b281117SSeth Jennings struct rb_node **link = &root->rb_node, *parent = NULL; 269*2b281117SSeth Jennings struct zswap_entry *myentry; 270*2b281117SSeth Jennings 271*2b281117SSeth Jennings while (*link) { 272*2b281117SSeth Jennings parent = *link; 273*2b281117SSeth Jennings myentry = rb_entry(parent, struct zswap_entry, rbnode); 274*2b281117SSeth Jennings if (myentry->offset > entry->offset) 275*2b281117SSeth Jennings link = &(*link)->rb_left; 276*2b281117SSeth Jennings else if (myentry->offset < entry->offset) 277*2b281117SSeth Jennings link = &(*link)->rb_right; 278*2b281117SSeth Jennings else { 279*2b281117SSeth Jennings *dupentry = myentry; 280*2b281117SSeth Jennings return -EEXIST; 281*2b281117SSeth Jennings } 282*2b281117SSeth Jennings } 283*2b281117SSeth Jennings rb_link_node(&entry->rbnode, parent, link); 284*2b281117SSeth Jennings rb_insert_color(&entry->rbnode, root); 285*2b281117SSeth Jennings return 0; 286*2b281117SSeth Jennings } 287*2b281117SSeth Jennings 288*2b281117SSeth Jennings /********************************* 289*2b281117SSeth Jennings * per-cpu code 290*2b281117SSeth Jennings **********************************/ 291*2b281117SSeth Jennings static DEFINE_PER_CPU(u8 *, zswap_dstmem); 292*2b281117SSeth Jennings 293*2b281117SSeth Jennings static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) 294*2b281117SSeth Jennings { 295*2b281117SSeth Jennings struct crypto_comp *tfm; 296*2b281117SSeth Jennings u8 *dst; 297*2b281117SSeth Jennings 298*2b281117SSeth Jennings switch (action) { 299*2b281117SSeth Jennings case CPU_UP_PREPARE: 300*2b281117SSeth Jennings tfm = crypto_alloc_comp(zswap_compressor, 0, 0); 301*2b281117SSeth Jennings if (IS_ERR(tfm)) { 302*2b281117SSeth Jennings pr_err("can't allocate compressor transform\n"); 303*2b281117SSeth Jennings return NOTIFY_BAD; 304*2b281117SSeth Jennings } 305*2b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; 306*2b281117SSeth Jennings dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); 307*2b281117SSeth Jennings if (!dst) { 308*2b281117SSeth Jennings pr_err("can't allocate compressor buffer\n"); 309*2b281117SSeth Jennings crypto_free_comp(tfm); 310*2b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 311*2b281117SSeth Jennings return NOTIFY_BAD; 312*2b281117SSeth Jennings } 313*2b281117SSeth Jennings per_cpu(zswap_dstmem, cpu) = dst; 314*2b281117SSeth Jennings break; 315*2b281117SSeth Jennings case CPU_DEAD: 316*2b281117SSeth Jennings case CPU_UP_CANCELED: 317*2b281117SSeth Jennings tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); 318*2b281117SSeth Jennings if (tfm) { 319*2b281117SSeth Jennings crypto_free_comp(tfm); 320*2b281117SSeth Jennings *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 321*2b281117SSeth Jennings } 322*2b281117SSeth Jennings dst = per_cpu(zswap_dstmem, cpu); 323*2b281117SSeth Jennings kfree(dst); 324*2b281117SSeth Jennings per_cpu(zswap_dstmem, cpu) = NULL; 325*2b281117SSeth Jennings break; 326*2b281117SSeth Jennings default: 327*2b281117SSeth Jennings break; 328*2b281117SSeth Jennings } 329*2b281117SSeth Jennings return NOTIFY_OK; 330*2b281117SSeth Jennings } 331*2b281117SSeth Jennings 332*2b281117SSeth Jennings static int zswap_cpu_notifier(struct notifier_block *nb, 333*2b281117SSeth Jennings unsigned long action, void *pcpu) 334*2b281117SSeth Jennings { 335*2b281117SSeth Jennings unsigned long cpu = (unsigned long)pcpu; 336*2b281117SSeth Jennings return __zswap_cpu_notifier(action, cpu); 337*2b281117SSeth Jennings } 338*2b281117SSeth Jennings 339*2b281117SSeth Jennings static struct notifier_block zswap_cpu_notifier_block = { 340*2b281117SSeth Jennings .notifier_call = zswap_cpu_notifier 341*2b281117SSeth Jennings }; 342*2b281117SSeth Jennings 343*2b281117SSeth Jennings static int zswap_cpu_init(void) 344*2b281117SSeth Jennings { 345*2b281117SSeth Jennings unsigned long cpu; 346*2b281117SSeth Jennings 347*2b281117SSeth Jennings get_online_cpus(); 348*2b281117SSeth Jennings for_each_online_cpu(cpu) 349*2b281117SSeth Jennings if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) 350*2b281117SSeth Jennings goto cleanup; 351*2b281117SSeth Jennings register_cpu_notifier(&zswap_cpu_notifier_block); 352*2b281117SSeth Jennings put_online_cpus(); 353*2b281117SSeth Jennings return 0; 354*2b281117SSeth Jennings 355*2b281117SSeth Jennings cleanup: 356*2b281117SSeth Jennings for_each_online_cpu(cpu) 357*2b281117SSeth Jennings __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); 358*2b281117SSeth Jennings put_online_cpus(); 359*2b281117SSeth Jennings return -ENOMEM; 360*2b281117SSeth Jennings } 361*2b281117SSeth Jennings 362*2b281117SSeth Jennings /********************************* 363*2b281117SSeth Jennings * helpers 364*2b281117SSeth Jennings **********************************/ 365*2b281117SSeth Jennings static bool zswap_is_full(void) 366*2b281117SSeth Jennings { 367*2b281117SSeth Jennings return (totalram_pages * zswap_max_pool_percent / 100 < 368*2b281117SSeth Jennings zswap_pool_pages); 369*2b281117SSeth Jennings } 370*2b281117SSeth Jennings 371*2b281117SSeth Jennings /* 372*2b281117SSeth Jennings * Carries out the common pattern of freeing and entry's zsmalloc allocation, 373*2b281117SSeth Jennings * freeing the entry itself, and decrementing the number of stored pages. 374*2b281117SSeth Jennings */ 375*2b281117SSeth Jennings static void zswap_free_entry(struct zswap_tree *tree, struct zswap_entry *entry) 376*2b281117SSeth Jennings { 377*2b281117SSeth Jennings zbud_free(tree->pool, entry->handle); 378*2b281117SSeth Jennings zswap_entry_cache_free(entry); 379*2b281117SSeth Jennings atomic_dec(&zswap_stored_pages); 380*2b281117SSeth Jennings zswap_pool_pages = zbud_get_pool_size(tree->pool); 381*2b281117SSeth Jennings } 382*2b281117SSeth Jennings 383*2b281117SSeth Jennings /********************************* 384*2b281117SSeth Jennings * writeback code 385*2b281117SSeth Jennings **********************************/ 386*2b281117SSeth Jennings /* return enum for zswap_get_swap_cache_page */ 387*2b281117SSeth Jennings enum zswap_get_swap_ret { 388*2b281117SSeth Jennings ZSWAP_SWAPCACHE_NEW, 389*2b281117SSeth Jennings ZSWAP_SWAPCACHE_EXIST, 390*2b281117SSeth Jennings ZSWAP_SWAPCACHE_NOMEM 391*2b281117SSeth Jennings }; 392*2b281117SSeth Jennings 393*2b281117SSeth Jennings /* 394*2b281117SSeth Jennings * zswap_get_swap_cache_page 395*2b281117SSeth Jennings * 396*2b281117SSeth Jennings * This is an adaption of read_swap_cache_async() 397*2b281117SSeth Jennings * 398*2b281117SSeth Jennings * This function tries to find a page with the given swap entry 399*2b281117SSeth Jennings * in the swapper_space address space (the swap cache). If the page 400*2b281117SSeth Jennings * is found, it is returned in retpage. Otherwise, a page is allocated, 401*2b281117SSeth Jennings * added to the swap cache, and returned in retpage. 402*2b281117SSeth Jennings * 403*2b281117SSeth Jennings * If success, the swap cache page is returned in retpage 404*2b281117SSeth Jennings * Returns 0 if page was already in the swap cache, page is not locked 405*2b281117SSeth Jennings * Returns 1 if the new page needs to be populated, page is locked 406*2b281117SSeth Jennings * Returns <0 on error 407*2b281117SSeth Jennings */ 408*2b281117SSeth Jennings static int zswap_get_swap_cache_page(swp_entry_t entry, 409*2b281117SSeth Jennings struct page **retpage) 410*2b281117SSeth Jennings { 411*2b281117SSeth Jennings struct page *found_page, *new_page = NULL; 412*2b281117SSeth Jennings struct address_space *swapper_space = &swapper_spaces[swp_type(entry)]; 413*2b281117SSeth Jennings int err; 414*2b281117SSeth Jennings 415*2b281117SSeth Jennings *retpage = NULL; 416*2b281117SSeth Jennings do { 417*2b281117SSeth Jennings /* 418*2b281117SSeth Jennings * First check the swap cache. Since this is normally 419*2b281117SSeth Jennings * called after lookup_swap_cache() failed, re-calling 420*2b281117SSeth Jennings * that would confuse statistics. 421*2b281117SSeth Jennings */ 422*2b281117SSeth Jennings found_page = find_get_page(swapper_space, entry.val); 423*2b281117SSeth Jennings if (found_page) 424*2b281117SSeth Jennings break; 425*2b281117SSeth Jennings 426*2b281117SSeth Jennings /* 427*2b281117SSeth Jennings * Get a new page to read into from swap. 428*2b281117SSeth Jennings */ 429*2b281117SSeth Jennings if (!new_page) { 430*2b281117SSeth Jennings new_page = alloc_page(GFP_KERNEL); 431*2b281117SSeth Jennings if (!new_page) 432*2b281117SSeth Jennings break; /* Out of memory */ 433*2b281117SSeth Jennings } 434*2b281117SSeth Jennings 435*2b281117SSeth Jennings /* 436*2b281117SSeth Jennings * call radix_tree_preload() while we can wait. 437*2b281117SSeth Jennings */ 438*2b281117SSeth Jennings err = radix_tree_preload(GFP_KERNEL); 439*2b281117SSeth Jennings if (err) 440*2b281117SSeth Jennings break; 441*2b281117SSeth Jennings 442*2b281117SSeth Jennings /* 443*2b281117SSeth Jennings * Swap entry may have been freed since our caller observed it. 444*2b281117SSeth Jennings */ 445*2b281117SSeth Jennings err = swapcache_prepare(entry); 446*2b281117SSeth Jennings if (err == -EEXIST) { /* seems racy */ 447*2b281117SSeth Jennings radix_tree_preload_end(); 448*2b281117SSeth Jennings continue; 449*2b281117SSeth Jennings } 450*2b281117SSeth Jennings if (err) { /* swp entry is obsolete ? */ 451*2b281117SSeth Jennings radix_tree_preload_end(); 452*2b281117SSeth Jennings break; 453*2b281117SSeth Jennings } 454*2b281117SSeth Jennings 455*2b281117SSeth Jennings /* May fail (-ENOMEM) if radix-tree node allocation failed. */ 456*2b281117SSeth Jennings __set_page_locked(new_page); 457*2b281117SSeth Jennings SetPageSwapBacked(new_page); 458*2b281117SSeth Jennings err = __add_to_swap_cache(new_page, entry); 459*2b281117SSeth Jennings if (likely(!err)) { 460*2b281117SSeth Jennings radix_tree_preload_end(); 461*2b281117SSeth Jennings lru_cache_add_anon(new_page); 462*2b281117SSeth Jennings *retpage = new_page; 463*2b281117SSeth Jennings return ZSWAP_SWAPCACHE_NEW; 464*2b281117SSeth Jennings } 465*2b281117SSeth Jennings radix_tree_preload_end(); 466*2b281117SSeth Jennings ClearPageSwapBacked(new_page); 467*2b281117SSeth Jennings __clear_page_locked(new_page); 468*2b281117SSeth Jennings /* 469*2b281117SSeth Jennings * add_to_swap_cache() doesn't return -EEXIST, so we can safely 470*2b281117SSeth Jennings * clear SWAP_HAS_CACHE flag. 471*2b281117SSeth Jennings */ 472*2b281117SSeth Jennings swapcache_free(entry, NULL); 473*2b281117SSeth Jennings } while (err != -ENOMEM); 474*2b281117SSeth Jennings 475*2b281117SSeth Jennings if (new_page) 476*2b281117SSeth Jennings page_cache_release(new_page); 477*2b281117SSeth Jennings if (!found_page) 478*2b281117SSeth Jennings return ZSWAP_SWAPCACHE_NOMEM; 479*2b281117SSeth Jennings *retpage = found_page; 480*2b281117SSeth Jennings return ZSWAP_SWAPCACHE_EXIST; 481*2b281117SSeth Jennings } 482*2b281117SSeth Jennings 483*2b281117SSeth Jennings /* 484*2b281117SSeth Jennings * Attempts to free an entry by adding a page to the swap cache, 485*2b281117SSeth Jennings * decompressing the entry data into the page, and issuing a 486*2b281117SSeth Jennings * bio write to write the page back to the swap device. 487*2b281117SSeth Jennings * 488*2b281117SSeth Jennings * This can be thought of as a "resumed writeback" of the page 489*2b281117SSeth Jennings * to the swap device. We are basically resuming the same swap 490*2b281117SSeth Jennings * writeback path that was intercepted with the frontswap_store() 491*2b281117SSeth Jennings * in the first place. After the page has been decompressed into 492*2b281117SSeth Jennings * the swap cache, the compressed version stored by zswap can be 493*2b281117SSeth Jennings * freed. 494*2b281117SSeth Jennings */ 495*2b281117SSeth Jennings static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) 496*2b281117SSeth Jennings { 497*2b281117SSeth Jennings struct zswap_header *zhdr; 498*2b281117SSeth Jennings swp_entry_t swpentry; 499*2b281117SSeth Jennings struct zswap_tree *tree; 500*2b281117SSeth Jennings pgoff_t offset; 501*2b281117SSeth Jennings struct zswap_entry *entry; 502*2b281117SSeth Jennings struct page *page; 503*2b281117SSeth Jennings u8 *src, *dst; 504*2b281117SSeth Jennings unsigned int dlen; 505*2b281117SSeth Jennings int ret, refcount; 506*2b281117SSeth Jennings struct writeback_control wbc = { 507*2b281117SSeth Jennings .sync_mode = WB_SYNC_NONE, 508*2b281117SSeth Jennings }; 509*2b281117SSeth Jennings 510*2b281117SSeth Jennings /* extract swpentry from data */ 511*2b281117SSeth Jennings zhdr = zbud_map(pool, handle); 512*2b281117SSeth Jennings swpentry = zhdr->swpentry; /* here */ 513*2b281117SSeth Jennings zbud_unmap(pool, handle); 514*2b281117SSeth Jennings tree = zswap_trees[swp_type(swpentry)]; 515*2b281117SSeth Jennings offset = swp_offset(swpentry); 516*2b281117SSeth Jennings BUG_ON(pool != tree->pool); 517*2b281117SSeth Jennings 518*2b281117SSeth Jennings /* find and ref zswap entry */ 519*2b281117SSeth Jennings spin_lock(&tree->lock); 520*2b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 521*2b281117SSeth Jennings if (!entry) { 522*2b281117SSeth Jennings /* entry was invalidated */ 523*2b281117SSeth Jennings spin_unlock(&tree->lock); 524*2b281117SSeth Jennings return 0; 525*2b281117SSeth Jennings } 526*2b281117SSeth Jennings zswap_entry_get(entry); 527*2b281117SSeth Jennings spin_unlock(&tree->lock); 528*2b281117SSeth Jennings BUG_ON(offset != entry->offset); 529*2b281117SSeth Jennings 530*2b281117SSeth Jennings /* try to allocate swap cache page */ 531*2b281117SSeth Jennings switch (zswap_get_swap_cache_page(swpentry, &page)) { 532*2b281117SSeth Jennings case ZSWAP_SWAPCACHE_NOMEM: /* no memory */ 533*2b281117SSeth Jennings ret = -ENOMEM; 534*2b281117SSeth Jennings goto fail; 535*2b281117SSeth Jennings 536*2b281117SSeth Jennings case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */ 537*2b281117SSeth Jennings /* page is already in the swap cache, ignore for now */ 538*2b281117SSeth Jennings page_cache_release(page); 539*2b281117SSeth Jennings ret = -EEXIST; 540*2b281117SSeth Jennings goto fail; 541*2b281117SSeth Jennings 542*2b281117SSeth Jennings case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 543*2b281117SSeth Jennings /* decompress */ 544*2b281117SSeth Jennings dlen = PAGE_SIZE; 545*2b281117SSeth Jennings src = (u8 *)zbud_map(tree->pool, entry->handle) + 546*2b281117SSeth Jennings sizeof(struct zswap_header); 547*2b281117SSeth Jennings dst = kmap_atomic(page); 548*2b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 549*2b281117SSeth Jennings entry->length, dst, &dlen); 550*2b281117SSeth Jennings kunmap_atomic(dst); 551*2b281117SSeth Jennings zbud_unmap(tree->pool, entry->handle); 552*2b281117SSeth Jennings BUG_ON(ret); 553*2b281117SSeth Jennings BUG_ON(dlen != PAGE_SIZE); 554*2b281117SSeth Jennings 555*2b281117SSeth Jennings /* page is up to date */ 556*2b281117SSeth Jennings SetPageUptodate(page); 557*2b281117SSeth Jennings } 558*2b281117SSeth Jennings 559*2b281117SSeth Jennings /* start writeback */ 560*2b281117SSeth Jennings __swap_writepage(page, &wbc, end_swap_bio_write); 561*2b281117SSeth Jennings page_cache_release(page); 562*2b281117SSeth Jennings zswap_written_back_pages++; 563*2b281117SSeth Jennings 564*2b281117SSeth Jennings spin_lock(&tree->lock); 565*2b281117SSeth Jennings 566*2b281117SSeth Jennings /* drop local reference */ 567*2b281117SSeth Jennings zswap_entry_put(entry); 568*2b281117SSeth Jennings /* drop the initial reference from entry creation */ 569*2b281117SSeth Jennings refcount = zswap_entry_put(entry); 570*2b281117SSeth Jennings 571*2b281117SSeth Jennings /* 572*2b281117SSeth Jennings * There are three possible values for refcount here: 573*2b281117SSeth Jennings * (1) refcount is 1, load is in progress, unlink from rbtree, 574*2b281117SSeth Jennings * load will free 575*2b281117SSeth Jennings * (2) refcount is 0, (normal case) entry is valid, 576*2b281117SSeth Jennings * remove from rbtree and free entry 577*2b281117SSeth Jennings * (3) refcount is -1, invalidate happened during writeback, 578*2b281117SSeth Jennings * free entry 579*2b281117SSeth Jennings */ 580*2b281117SSeth Jennings if (refcount >= 0) { 581*2b281117SSeth Jennings /* no invalidate yet, remove from rbtree */ 582*2b281117SSeth Jennings rb_erase(&entry->rbnode, &tree->rbroot); 583*2b281117SSeth Jennings } 584*2b281117SSeth Jennings spin_unlock(&tree->lock); 585*2b281117SSeth Jennings if (refcount <= 0) { 586*2b281117SSeth Jennings /* free the entry */ 587*2b281117SSeth Jennings zswap_free_entry(tree, entry); 588*2b281117SSeth Jennings return 0; 589*2b281117SSeth Jennings } 590*2b281117SSeth Jennings return -EAGAIN; 591*2b281117SSeth Jennings 592*2b281117SSeth Jennings fail: 593*2b281117SSeth Jennings spin_lock(&tree->lock); 594*2b281117SSeth Jennings zswap_entry_put(entry); 595*2b281117SSeth Jennings spin_unlock(&tree->lock); 596*2b281117SSeth Jennings return ret; 597*2b281117SSeth Jennings } 598*2b281117SSeth Jennings 599*2b281117SSeth Jennings /********************************* 600*2b281117SSeth Jennings * frontswap hooks 601*2b281117SSeth Jennings **********************************/ 602*2b281117SSeth Jennings /* attempts to compress and store an single page */ 603*2b281117SSeth Jennings static int zswap_frontswap_store(unsigned type, pgoff_t offset, 604*2b281117SSeth Jennings struct page *page) 605*2b281117SSeth Jennings { 606*2b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 607*2b281117SSeth Jennings struct zswap_entry *entry, *dupentry; 608*2b281117SSeth Jennings int ret; 609*2b281117SSeth Jennings unsigned int dlen = PAGE_SIZE, len; 610*2b281117SSeth Jennings unsigned long handle; 611*2b281117SSeth Jennings char *buf; 612*2b281117SSeth Jennings u8 *src, *dst; 613*2b281117SSeth Jennings struct zswap_header *zhdr; 614*2b281117SSeth Jennings 615*2b281117SSeth Jennings if (!tree) { 616*2b281117SSeth Jennings ret = -ENODEV; 617*2b281117SSeth Jennings goto reject; 618*2b281117SSeth Jennings } 619*2b281117SSeth Jennings 620*2b281117SSeth Jennings /* reclaim space if needed */ 621*2b281117SSeth Jennings if (zswap_is_full()) { 622*2b281117SSeth Jennings zswap_pool_limit_hit++; 623*2b281117SSeth Jennings if (zbud_reclaim_page(tree->pool, 8)) { 624*2b281117SSeth Jennings zswap_reject_reclaim_fail++; 625*2b281117SSeth Jennings ret = -ENOMEM; 626*2b281117SSeth Jennings goto reject; 627*2b281117SSeth Jennings } 628*2b281117SSeth Jennings } 629*2b281117SSeth Jennings 630*2b281117SSeth Jennings /* allocate entry */ 631*2b281117SSeth Jennings entry = zswap_entry_cache_alloc(GFP_KERNEL); 632*2b281117SSeth Jennings if (!entry) { 633*2b281117SSeth Jennings zswap_reject_kmemcache_fail++; 634*2b281117SSeth Jennings ret = -ENOMEM; 635*2b281117SSeth Jennings goto reject; 636*2b281117SSeth Jennings } 637*2b281117SSeth Jennings 638*2b281117SSeth Jennings /* compress */ 639*2b281117SSeth Jennings dst = get_cpu_var(zswap_dstmem); 640*2b281117SSeth Jennings src = kmap_atomic(page); 641*2b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); 642*2b281117SSeth Jennings kunmap_atomic(src); 643*2b281117SSeth Jennings if (ret) { 644*2b281117SSeth Jennings ret = -EINVAL; 645*2b281117SSeth Jennings goto freepage; 646*2b281117SSeth Jennings } 647*2b281117SSeth Jennings 648*2b281117SSeth Jennings /* store */ 649*2b281117SSeth Jennings len = dlen + sizeof(struct zswap_header); 650*2b281117SSeth Jennings ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN, 651*2b281117SSeth Jennings &handle); 652*2b281117SSeth Jennings if (ret == -ENOSPC) { 653*2b281117SSeth Jennings zswap_reject_compress_poor++; 654*2b281117SSeth Jennings goto freepage; 655*2b281117SSeth Jennings } 656*2b281117SSeth Jennings if (ret) { 657*2b281117SSeth Jennings zswap_reject_alloc_fail++; 658*2b281117SSeth Jennings goto freepage; 659*2b281117SSeth Jennings } 660*2b281117SSeth Jennings zhdr = zbud_map(tree->pool, handle); 661*2b281117SSeth Jennings zhdr->swpentry = swp_entry(type, offset); 662*2b281117SSeth Jennings buf = (u8 *)(zhdr + 1); 663*2b281117SSeth Jennings memcpy(buf, dst, dlen); 664*2b281117SSeth Jennings zbud_unmap(tree->pool, handle); 665*2b281117SSeth Jennings put_cpu_var(zswap_dstmem); 666*2b281117SSeth Jennings 667*2b281117SSeth Jennings /* populate entry */ 668*2b281117SSeth Jennings entry->offset = offset; 669*2b281117SSeth Jennings entry->handle = handle; 670*2b281117SSeth Jennings entry->length = dlen; 671*2b281117SSeth Jennings 672*2b281117SSeth Jennings /* map */ 673*2b281117SSeth Jennings spin_lock(&tree->lock); 674*2b281117SSeth Jennings do { 675*2b281117SSeth Jennings ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 676*2b281117SSeth Jennings if (ret == -EEXIST) { 677*2b281117SSeth Jennings zswap_duplicate_entry++; 678*2b281117SSeth Jennings /* remove from rbtree */ 679*2b281117SSeth Jennings rb_erase(&dupentry->rbnode, &tree->rbroot); 680*2b281117SSeth Jennings if (!zswap_entry_put(dupentry)) { 681*2b281117SSeth Jennings /* free */ 682*2b281117SSeth Jennings zswap_free_entry(tree, dupentry); 683*2b281117SSeth Jennings } 684*2b281117SSeth Jennings } 685*2b281117SSeth Jennings } while (ret == -EEXIST); 686*2b281117SSeth Jennings spin_unlock(&tree->lock); 687*2b281117SSeth Jennings 688*2b281117SSeth Jennings /* update stats */ 689*2b281117SSeth Jennings atomic_inc(&zswap_stored_pages); 690*2b281117SSeth Jennings zswap_pool_pages = zbud_get_pool_size(tree->pool); 691*2b281117SSeth Jennings 692*2b281117SSeth Jennings return 0; 693*2b281117SSeth Jennings 694*2b281117SSeth Jennings freepage: 695*2b281117SSeth Jennings put_cpu_var(zswap_dstmem); 696*2b281117SSeth Jennings zswap_entry_cache_free(entry); 697*2b281117SSeth Jennings reject: 698*2b281117SSeth Jennings return ret; 699*2b281117SSeth Jennings } 700*2b281117SSeth Jennings 701*2b281117SSeth Jennings /* 702*2b281117SSeth Jennings * returns 0 if the page was successfully decompressed 703*2b281117SSeth Jennings * return -1 on entry not found or error 704*2b281117SSeth Jennings */ 705*2b281117SSeth Jennings static int zswap_frontswap_load(unsigned type, pgoff_t offset, 706*2b281117SSeth Jennings struct page *page) 707*2b281117SSeth Jennings { 708*2b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 709*2b281117SSeth Jennings struct zswap_entry *entry; 710*2b281117SSeth Jennings u8 *src, *dst; 711*2b281117SSeth Jennings unsigned int dlen; 712*2b281117SSeth Jennings int refcount, ret; 713*2b281117SSeth Jennings 714*2b281117SSeth Jennings /* find */ 715*2b281117SSeth Jennings spin_lock(&tree->lock); 716*2b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 717*2b281117SSeth Jennings if (!entry) { 718*2b281117SSeth Jennings /* entry was written back */ 719*2b281117SSeth Jennings spin_unlock(&tree->lock); 720*2b281117SSeth Jennings return -1; 721*2b281117SSeth Jennings } 722*2b281117SSeth Jennings zswap_entry_get(entry); 723*2b281117SSeth Jennings spin_unlock(&tree->lock); 724*2b281117SSeth Jennings 725*2b281117SSeth Jennings /* decompress */ 726*2b281117SSeth Jennings dlen = PAGE_SIZE; 727*2b281117SSeth Jennings src = (u8 *)zbud_map(tree->pool, entry->handle) + 728*2b281117SSeth Jennings sizeof(struct zswap_header); 729*2b281117SSeth Jennings dst = kmap_atomic(page); 730*2b281117SSeth Jennings ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 731*2b281117SSeth Jennings dst, &dlen); 732*2b281117SSeth Jennings kunmap_atomic(dst); 733*2b281117SSeth Jennings zbud_unmap(tree->pool, entry->handle); 734*2b281117SSeth Jennings BUG_ON(ret); 735*2b281117SSeth Jennings 736*2b281117SSeth Jennings spin_lock(&tree->lock); 737*2b281117SSeth Jennings refcount = zswap_entry_put(entry); 738*2b281117SSeth Jennings if (likely(refcount)) { 739*2b281117SSeth Jennings spin_unlock(&tree->lock); 740*2b281117SSeth Jennings return 0; 741*2b281117SSeth Jennings } 742*2b281117SSeth Jennings spin_unlock(&tree->lock); 743*2b281117SSeth Jennings 744*2b281117SSeth Jennings /* 745*2b281117SSeth Jennings * We don't have to unlink from the rbtree because 746*2b281117SSeth Jennings * zswap_writeback_entry() or zswap_frontswap_invalidate page() 747*2b281117SSeth Jennings * has already done this for us if we are the last reference. 748*2b281117SSeth Jennings */ 749*2b281117SSeth Jennings /* free */ 750*2b281117SSeth Jennings 751*2b281117SSeth Jennings zswap_free_entry(tree, entry); 752*2b281117SSeth Jennings 753*2b281117SSeth Jennings return 0; 754*2b281117SSeth Jennings } 755*2b281117SSeth Jennings 756*2b281117SSeth Jennings /* frees an entry in zswap */ 757*2b281117SSeth Jennings static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 758*2b281117SSeth Jennings { 759*2b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 760*2b281117SSeth Jennings struct zswap_entry *entry; 761*2b281117SSeth Jennings int refcount; 762*2b281117SSeth Jennings 763*2b281117SSeth Jennings /* find */ 764*2b281117SSeth Jennings spin_lock(&tree->lock); 765*2b281117SSeth Jennings entry = zswap_rb_search(&tree->rbroot, offset); 766*2b281117SSeth Jennings if (!entry) { 767*2b281117SSeth Jennings /* entry was written back */ 768*2b281117SSeth Jennings spin_unlock(&tree->lock); 769*2b281117SSeth Jennings return; 770*2b281117SSeth Jennings } 771*2b281117SSeth Jennings 772*2b281117SSeth Jennings /* remove from rbtree */ 773*2b281117SSeth Jennings rb_erase(&entry->rbnode, &tree->rbroot); 774*2b281117SSeth Jennings 775*2b281117SSeth Jennings /* drop the initial reference from entry creation */ 776*2b281117SSeth Jennings refcount = zswap_entry_put(entry); 777*2b281117SSeth Jennings 778*2b281117SSeth Jennings spin_unlock(&tree->lock); 779*2b281117SSeth Jennings 780*2b281117SSeth Jennings if (refcount) { 781*2b281117SSeth Jennings /* writeback in progress, writeback will free */ 782*2b281117SSeth Jennings return; 783*2b281117SSeth Jennings } 784*2b281117SSeth Jennings 785*2b281117SSeth Jennings /* free */ 786*2b281117SSeth Jennings zswap_free_entry(tree, entry); 787*2b281117SSeth Jennings } 788*2b281117SSeth Jennings 789*2b281117SSeth Jennings /* frees all zswap entries for the given swap type */ 790*2b281117SSeth Jennings static void zswap_frontswap_invalidate_area(unsigned type) 791*2b281117SSeth Jennings { 792*2b281117SSeth Jennings struct zswap_tree *tree = zswap_trees[type]; 793*2b281117SSeth Jennings struct rb_node *node; 794*2b281117SSeth Jennings struct zswap_entry *entry; 795*2b281117SSeth Jennings 796*2b281117SSeth Jennings if (!tree) 797*2b281117SSeth Jennings return; 798*2b281117SSeth Jennings 799*2b281117SSeth Jennings /* walk the tree and free everything */ 800*2b281117SSeth Jennings spin_lock(&tree->lock); 801*2b281117SSeth Jennings /* 802*2b281117SSeth Jennings * TODO: Even though this code should not be executed because 803*2b281117SSeth Jennings * the try_to_unuse() in swapoff should have emptied the tree, 804*2b281117SSeth Jennings * it is very wasteful to rebalance the tree after every 805*2b281117SSeth Jennings * removal when we are freeing the whole tree. 806*2b281117SSeth Jennings * 807*2b281117SSeth Jennings * If post-order traversal code is ever added to the rbtree 808*2b281117SSeth Jennings * implementation, it should be used here. 809*2b281117SSeth Jennings */ 810*2b281117SSeth Jennings while ((node = rb_first(&tree->rbroot))) { 811*2b281117SSeth Jennings entry = rb_entry(node, struct zswap_entry, rbnode); 812*2b281117SSeth Jennings rb_erase(&entry->rbnode, &tree->rbroot); 813*2b281117SSeth Jennings zbud_free(tree->pool, entry->handle); 814*2b281117SSeth Jennings zswap_entry_cache_free(entry); 815*2b281117SSeth Jennings atomic_dec(&zswap_stored_pages); 816*2b281117SSeth Jennings } 817*2b281117SSeth Jennings tree->rbroot = RB_ROOT; 818*2b281117SSeth Jennings spin_unlock(&tree->lock); 819*2b281117SSeth Jennings } 820*2b281117SSeth Jennings 821*2b281117SSeth Jennings static struct zbud_ops zswap_zbud_ops = { 822*2b281117SSeth Jennings .evict = zswap_writeback_entry 823*2b281117SSeth Jennings }; 824*2b281117SSeth Jennings 825*2b281117SSeth Jennings static void zswap_frontswap_init(unsigned type) 826*2b281117SSeth Jennings { 827*2b281117SSeth Jennings struct zswap_tree *tree; 828*2b281117SSeth Jennings 829*2b281117SSeth Jennings tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 830*2b281117SSeth Jennings if (!tree) 831*2b281117SSeth Jennings goto err; 832*2b281117SSeth Jennings tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 833*2b281117SSeth Jennings if (!tree->pool) 834*2b281117SSeth Jennings goto freetree; 835*2b281117SSeth Jennings tree->rbroot = RB_ROOT; 836*2b281117SSeth Jennings spin_lock_init(&tree->lock); 837*2b281117SSeth Jennings zswap_trees[type] = tree; 838*2b281117SSeth Jennings return; 839*2b281117SSeth Jennings 840*2b281117SSeth Jennings freetree: 841*2b281117SSeth Jennings kfree(tree); 842*2b281117SSeth Jennings err: 843*2b281117SSeth Jennings pr_err("alloc failed, zswap disabled for swap type %d\n", type); 844*2b281117SSeth Jennings } 845*2b281117SSeth Jennings 846*2b281117SSeth Jennings static struct frontswap_ops zswap_frontswap_ops = { 847*2b281117SSeth Jennings .store = zswap_frontswap_store, 848*2b281117SSeth Jennings .load = zswap_frontswap_load, 849*2b281117SSeth Jennings .invalidate_page = zswap_frontswap_invalidate_page, 850*2b281117SSeth Jennings .invalidate_area = zswap_frontswap_invalidate_area, 851*2b281117SSeth Jennings .init = zswap_frontswap_init 852*2b281117SSeth Jennings }; 853*2b281117SSeth Jennings 854*2b281117SSeth Jennings /********************************* 855*2b281117SSeth Jennings * debugfs functions 856*2b281117SSeth Jennings **********************************/ 857*2b281117SSeth Jennings #ifdef CONFIG_DEBUG_FS 858*2b281117SSeth Jennings #include <linux/debugfs.h> 859*2b281117SSeth Jennings 860*2b281117SSeth Jennings static struct dentry *zswap_debugfs_root; 861*2b281117SSeth Jennings 862*2b281117SSeth Jennings static int __init zswap_debugfs_init(void) 863*2b281117SSeth Jennings { 864*2b281117SSeth Jennings if (!debugfs_initialized()) 865*2b281117SSeth Jennings return -ENODEV; 866*2b281117SSeth Jennings 867*2b281117SSeth Jennings zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 868*2b281117SSeth Jennings if (!zswap_debugfs_root) 869*2b281117SSeth Jennings return -ENOMEM; 870*2b281117SSeth Jennings 871*2b281117SSeth Jennings debugfs_create_u64("pool_limit_hit", S_IRUGO, 872*2b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_limit_hit); 873*2b281117SSeth Jennings debugfs_create_u64("reject_reclaim_fail", S_IRUGO, 874*2b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_reclaim_fail); 875*2b281117SSeth Jennings debugfs_create_u64("reject_alloc_fail", S_IRUGO, 876*2b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_alloc_fail); 877*2b281117SSeth Jennings debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, 878*2b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_kmemcache_fail); 879*2b281117SSeth Jennings debugfs_create_u64("reject_compress_poor", S_IRUGO, 880*2b281117SSeth Jennings zswap_debugfs_root, &zswap_reject_compress_poor); 881*2b281117SSeth Jennings debugfs_create_u64("written_back_pages", S_IRUGO, 882*2b281117SSeth Jennings zswap_debugfs_root, &zswap_written_back_pages); 883*2b281117SSeth Jennings debugfs_create_u64("duplicate_entry", S_IRUGO, 884*2b281117SSeth Jennings zswap_debugfs_root, &zswap_duplicate_entry); 885*2b281117SSeth Jennings debugfs_create_u64("pool_pages", S_IRUGO, 886*2b281117SSeth Jennings zswap_debugfs_root, &zswap_pool_pages); 887*2b281117SSeth Jennings debugfs_create_atomic_t("stored_pages", S_IRUGO, 888*2b281117SSeth Jennings zswap_debugfs_root, &zswap_stored_pages); 889*2b281117SSeth Jennings 890*2b281117SSeth Jennings return 0; 891*2b281117SSeth Jennings } 892*2b281117SSeth Jennings 893*2b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) 894*2b281117SSeth Jennings { 895*2b281117SSeth Jennings debugfs_remove_recursive(zswap_debugfs_root); 896*2b281117SSeth Jennings } 897*2b281117SSeth Jennings #else 898*2b281117SSeth Jennings static int __init zswap_debugfs_init(void) 899*2b281117SSeth Jennings { 900*2b281117SSeth Jennings return 0; 901*2b281117SSeth Jennings } 902*2b281117SSeth Jennings 903*2b281117SSeth Jennings static void __exit zswap_debugfs_exit(void) { } 904*2b281117SSeth Jennings #endif 905*2b281117SSeth Jennings 906*2b281117SSeth Jennings /********************************* 907*2b281117SSeth Jennings * module init and exit 908*2b281117SSeth Jennings **********************************/ 909*2b281117SSeth Jennings static int __init init_zswap(void) 910*2b281117SSeth Jennings { 911*2b281117SSeth Jennings if (!zswap_enabled) 912*2b281117SSeth Jennings return 0; 913*2b281117SSeth Jennings 914*2b281117SSeth Jennings pr_info("loading zswap\n"); 915*2b281117SSeth Jennings if (zswap_entry_cache_create()) { 916*2b281117SSeth Jennings pr_err("entry cache creation failed\n"); 917*2b281117SSeth Jennings goto error; 918*2b281117SSeth Jennings } 919*2b281117SSeth Jennings if (zswap_comp_init()) { 920*2b281117SSeth Jennings pr_err("compressor initialization failed\n"); 921*2b281117SSeth Jennings goto compfail; 922*2b281117SSeth Jennings } 923*2b281117SSeth Jennings if (zswap_cpu_init()) { 924*2b281117SSeth Jennings pr_err("per-cpu initialization failed\n"); 925*2b281117SSeth Jennings goto pcpufail; 926*2b281117SSeth Jennings } 927*2b281117SSeth Jennings frontswap_register_ops(&zswap_frontswap_ops); 928*2b281117SSeth Jennings if (zswap_debugfs_init()) 929*2b281117SSeth Jennings pr_warn("debugfs initialization failed\n"); 930*2b281117SSeth Jennings return 0; 931*2b281117SSeth Jennings pcpufail: 932*2b281117SSeth Jennings zswap_comp_exit(); 933*2b281117SSeth Jennings compfail: 934*2b281117SSeth Jennings zswap_entry_cache_destory(); 935*2b281117SSeth Jennings error: 936*2b281117SSeth Jennings return -ENOMEM; 937*2b281117SSeth Jennings } 938*2b281117SSeth Jennings /* must be late so crypto has time to come up */ 939*2b281117SSeth Jennings late_initcall(init_zswap); 940*2b281117SSeth Jennings 941*2b281117SSeth Jennings MODULE_LICENSE("GPL"); 942*2b281117SSeth Jennings MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>"); 943*2b281117SSeth Jennings MODULE_DESCRIPTION("Compressed cache for swap pages"); 944