1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36 #include <linux/kernel_read_file.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47 #define ZRAM_MAX_ALGO_NAME_SZ 128
48
49 /* Module params (documentation at end) */
50 static unsigned int num_devices = 1;
51 /*
52 * Pages that compress to sizes equals or greater than this are stored
53 * uncompressed in memory.
54 */
55 static size_t huge_class_size;
56
57 static const struct block_device_operations zram_devops;
58
59 static void zram_free_page(struct zram *zram, size_t index);
60 static int zram_read_from_zspool(struct zram *zram, struct page *page,
61 u32 index);
62
63 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
64
zram_slot_lock_init(struct zram * zram,u32 index)65 static void zram_slot_lock_init(struct zram *zram, u32 index)
66 {
67 static struct lock_class_key __key;
68
69 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
70 &__key, 0);
71 }
72
73 /*
74 * entry locking rules:
75 *
76 * 1) Lock is exclusive
77 *
78 * 2) lock() function can sleep waiting for the lock
79 *
80 * 3) Lock owner can sleep
81 *
82 * 4) Use TRY lock variant when in atomic context
83 * - must check return value and handle locking failers
84 */
zram_slot_trylock(struct zram * zram,u32 index)85 static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
86 {
87 unsigned long *lock = &zram->table[index].flags;
88
89 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
90 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
91 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
92 return true;
93 }
94
95 return false;
96 }
97
zram_slot_lock(struct zram * zram,u32 index)98 static void zram_slot_lock(struct zram *zram, u32 index)
99 {
100 unsigned long *lock = &zram->table[index].flags;
101
102 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
103 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
104 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
105 }
106
zram_slot_unlock(struct zram * zram,u32 index)107 static void zram_slot_unlock(struct zram *zram, u32 index)
108 {
109 unsigned long *lock = &zram->table[index].flags;
110
111 mutex_release(slot_dep_map(zram, index), _RET_IP_);
112 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
113 }
114
init_done(struct zram * zram)115 static inline bool init_done(struct zram *zram)
116 {
117 return zram->disksize;
118 }
119
dev_to_zram(struct device * dev)120 static inline struct zram *dev_to_zram(struct device *dev)
121 {
122 return (struct zram *)dev_to_disk(dev)->private_data;
123 }
124
zram_get_handle(struct zram * zram,u32 index)125 static unsigned long zram_get_handle(struct zram *zram, u32 index)
126 {
127 return zram->table[index].handle;
128 }
129
zram_set_handle(struct zram * zram,u32 index,unsigned long handle)130 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
131 {
132 zram->table[index].handle = handle;
133 }
134
zram_test_flag(struct zram * zram,u32 index,enum zram_pageflags flag)135 static bool zram_test_flag(struct zram *zram, u32 index,
136 enum zram_pageflags flag)
137 {
138 return zram->table[index].flags & BIT(flag);
139 }
140
zram_set_flag(struct zram * zram,u32 index,enum zram_pageflags flag)141 static void zram_set_flag(struct zram *zram, u32 index,
142 enum zram_pageflags flag)
143 {
144 zram->table[index].flags |= BIT(flag);
145 }
146
zram_clear_flag(struct zram * zram,u32 index,enum zram_pageflags flag)147 static void zram_clear_flag(struct zram *zram, u32 index,
148 enum zram_pageflags flag)
149 {
150 zram->table[index].flags &= ~BIT(flag);
151 }
152
zram_get_obj_size(struct zram * zram,u32 index)153 static size_t zram_get_obj_size(struct zram *zram, u32 index)
154 {
155 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
156 }
157
zram_set_obj_size(struct zram * zram,u32 index,size_t size)158 static void zram_set_obj_size(struct zram *zram,
159 u32 index, size_t size)
160 {
161 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
162
163 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
164 }
165
zram_allocated(struct zram * zram,u32 index)166 static inline bool zram_allocated(struct zram *zram, u32 index)
167 {
168 return zram_get_obj_size(zram, index) ||
169 zram_test_flag(zram, index, ZRAM_SAME) ||
170 zram_test_flag(zram, index, ZRAM_WB);
171 }
172
update_used_max(struct zram * zram,const unsigned long pages)173 static inline void update_used_max(struct zram *zram, const unsigned long pages)
174 {
175 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
176
177 do {
178 if (cur_max >= pages)
179 return;
180 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
181 &cur_max, pages));
182 }
183
zram_can_store_page(struct zram * zram)184 static bool zram_can_store_page(struct zram *zram)
185 {
186 unsigned long alloced_pages;
187
188 alloced_pages = zs_get_total_pages(zram->mem_pool);
189 update_used_max(zram, alloced_pages);
190
191 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
192 }
193
194 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)195 static inline bool is_partial_io(struct bio_vec *bvec)
196 {
197 return bvec->bv_len != PAGE_SIZE;
198 }
199 #define ZRAM_PARTIAL_IO 1
200 #else
is_partial_io(struct bio_vec * bvec)201 static inline bool is_partial_io(struct bio_vec *bvec)
202 {
203 return false;
204 }
205 #endif
206
zram_set_priority(struct zram * zram,u32 index,u32 prio)207 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
208 {
209 prio &= ZRAM_COMP_PRIORITY_MASK;
210 /*
211 * Clear previous priority value first, in case if we recompress
212 * further an already recompressed page
213 */
214 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
215 ZRAM_COMP_PRIORITY_BIT1);
216 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
217 }
218
zram_get_priority(struct zram * zram,u32 index)219 static inline u32 zram_get_priority(struct zram *zram, u32 index)
220 {
221 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
222
223 return prio & ZRAM_COMP_PRIORITY_MASK;
224 }
225
zram_accessed(struct zram * zram,u32 index)226 static void zram_accessed(struct zram *zram, u32 index)
227 {
228 zram_clear_flag(zram, index, ZRAM_IDLE);
229 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
230 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
231 zram->table[index].ac_time = ktime_get_boottime();
232 #endif
233 }
234
235 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
236 struct zram_pp_slot {
237 unsigned long index;
238 struct list_head entry;
239 };
240
241 /*
242 * A post-processing bucket is, essentially, a size class, this defines
243 * the range (in bytes) of pp-slots sizes in particular bucket.
244 */
245 #define PP_BUCKET_SIZE_RANGE 64
246 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
247
248 struct zram_pp_ctl {
249 struct list_head pp_buckets[NUM_PP_BUCKETS];
250 };
251
init_pp_ctl(void)252 static struct zram_pp_ctl *init_pp_ctl(void)
253 {
254 struct zram_pp_ctl *ctl;
255 u32 idx;
256
257 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
258 if (!ctl)
259 return NULL;
260
261 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
262 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
263 return ctl;
264 }
265
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)266 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
267 {
268 list_del_init(&pps->entry);
269
270 zram_slot_lock(zram, pps->index);
271 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
272 zram_slot_unlock(zram, pps->index);
273
274 kfree(pps);
275 }
276
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)277 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
278 {
279 u32 idx;
280
281 if (!ctl)
282 return;
283
284 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
285 while (!list_empty(&ctl->pp_buckets[idx])) {
286 struct zram_pp_slot *pps;
287
288 pps = list_first_entry(&ctl->pp_buckets[idx],
289 struct zram_pp_slot,
290 entry);
291 release_pp_slot(zram, pps);
292 }
293 }
294
295 kfree(ctl);
296 }
297
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)298 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
299 u32 index)
300 {
301 struct zram_pp_slot *pps;
302 u32 bid;
303
304 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN);
305 if (!pps)
306 return false;
307
308 INIT_LIST_HEAD(&pps->entry);
309 pps->index = index;
310
311 bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
312 list_add(&pps->entry, &ctl->pp_buckets[bid]);
313
314 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
315 return true;
316 }
317
select_pp_slot(struct zram_pp_ctl * ctl)318 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
319 {
320 struct zram_pp_slot *pps = NULL;
321 s32 idx = NUM_PP_BUCKETS - 1;
322
323 /* The higher the bucket id the more optimal slot post-processing is */
324 while (idx >= 0) {
325 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
326 struct zram_pp_slot,
327 entry);
328 if (pps)
329 break;
330
331 idx--;
332 }
333 return pps;
334 }
335 #endif
336
zram_fill_page(void * ptr,unsigned long len,unsigned long value)337 static inline void zram_fill_page(void *ptr, unsigned long len,
338 unsigned long value)
339 {
340 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
341 memset_l(ptr, value, len / sizeof(unsigned long));
342 }
343
page_same_filled(void * ptr,unsigned long * element)344 static bool page_same_filled(void *ptr, unsigned long *element)
345 {
346 unsigned long *page;
347 unsigned long val;
348 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
349
350 page = (unsigned long *)ptr;
351 val = page[0];
352
353 if (val != page[last_pos])
354 return false;
355
356 for (pos = 1; pos < last_pos; pos++) {
357 if (val != page[pos])
358 return false;
359 }
360
361 *element = val;
362
363 return true;
364 }
365
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)366 static ssize_t initstate_show(struct device *dev,
367 struct device_attribute *attr, char *buf)
368 {
369 u32 val;
370 struct zram *zram = dev_to_zram(dev);
371
372 down_read(&zram->init_lock);
373 val = init_done(zram);
374 up_read(&zram->init_lock);
375
376 return sysfs_emit(buf, "%u\n", val);
377 }
378
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)379 static ssize_t disksize_show(struct device *dev,
380 struct device_attribute *attr, char *buf)
381 {
382 struct zram *zram = dev_to_zram(dev);
383
384 return sysfs_emit(buf, "%llu\n", zram->disksize);
385 }
386
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)387 static ssize_t mem_limit_store(struct device *dev,
388 struct device_attribute *attr, const char *buf, size_t len)
389 {
390 u64 limit;
391 char *tmp;
392 struct zram *zram = dev_to_zram(dev);
393
394 limit = memparse(buf, &tmp);
395 if (buf == tmp) /* no chars parsed, invalid input */
396 return -EINVAL;
397
398 down_write(&zram->init_lock);
399 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
400 up_write(&zram->init_lock);
401
402 return len;
403 }
404
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)405 static ssize_t mem_used_max_store(struct device *dev,
406 struct device_attribute *attr, const char *buf, size_t len)
407 {
408 int err;
409 unsigned long val;
410 struct zram *zram = dev_to_zram(dev);
411
412 err = kstrtoul(buf, 10, &val);
413 if (err || val != 0)
414 return -EINVAL;
415
416 down_read(&zram->init_lock);
417 if (init_done(zram)) {
418 atomic_long_set(&zram->stats.max_used_pages,
419 zs_get_total_pages(zram->mem_pool));
420 }
421 up_read(&zram->init_lock);
422
423 return len;
424 }
425
426 /*
427 * Mark all pages which are older than or equal to cutoff as IDLE.
428 * Callers should hold the zram init lock in read mode
429 */
mark_idle(struct zram * zram,ktime_t cutoff)430 static void mark_idle(struct zram *zram, ktime_t cutoff)
431 {
432 int is_idle = 1;
433 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
434 int index;
435
436 for (index = 0; index < nr_pages; index++) {
437 /*
438 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
439 * post-processing (recompress, writeback) happens to the
440 * ZRAM_SAME slot.
441 *
442 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
443 */
444 zram_slot_lock(zram, index);
445 if (!zram_allocated(zram, index) ||
446 zram_test_flag(zram, index, ZRAM_WB) ||
447 zram_test_flag(zram, index, ZRAM_SAME)) {
448 zram_slot_unlock(zram, index);
449 continue;
450 }
451
452 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
453 is_idle = !cutoff ||
454 ktime_after(cutoff, zram->table[index].ac_time);
455 #endif
456 if (is_idle)
457 zram_set_flag(zram, index, ZRAM_IDLE);
458 else
459 zram_clear_flag(zram, index, ZRAM_IDLE);
460 zram_slot_unlock(zram, index);
461 }
462 }
463
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)464 static ssize_t idle_store(struct device *dev,
465 struct device_attribute *attr, const char *buf, size_t len)
466 {
467 struct zram *zram = dev_to_zram(dev);
468 ktime_t cutoff_time = 0;
469 ssize_t rv = -EINVAL;
470
471 if (!sysfs_streq(buf, "all")) {
472 /*
473 * If it did not parse as 'all' try to treat it as an integer
474 * when we have memory tracking enabled.
475 */
476 u64 age_sec;
477
478 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec))
479 cutoff_time = ktime_sub(ktime_get_boottime(),
480 ns_to_ktime(age_sec * NSEC_PER_SEC));
481 else
482 goto out;
483 }
484
485 down_read(&zram->init_lock);
486 if (!init_done(zram))
487 goto out_unlock;
488
489 /*
490 * A cutoff_time of 0 marks everything as idle, this is the
491 * "all" behavior.
492 */
493 mark_idle(zram, cutoff_time);
494 rv = len;
495
496 out_unlock:
497 up_read(&zram->init_lock);
498 out:
499 return rv;
500 }
501
502 #ifdef CONFIG_ZRAM_WRITEBACK
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)503 static ssize_t writeback_limit_enable_store(struct device *dev,
504 struct device_attribute *attr, const char *buf, size_t len)
505 {
506 struct zram *zram = dev_to_zram(dev);
507 u64 val;
508 ssize_t ret = -EINVAL;
509
510 if (kstrtoull(buf, 10, &val))
511 return ret;
512
513 down_read(&zram->init_lock);
514 spin_lock(&zram->wb_limit_lock);
515 zram->wb_limit_enable = val;
516 spin_unlock(&zram->wb_limit_lock);
517 up_read(&zram->init_lock);
518 ret = len;
519
520 return ret;
521 }
522
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)523 static ssize_t writeback_limit_enable_show(struct device *dev,
524 struct device_attribute *attr, char *buf)
525 {
526 bool val;
527 struct zram *zram = dev_to_zram(dev);
528
529 down_read(&zram->init_lock);
530 spin_lock(&zram->wb_limit_lock);
531 val = zram->wb_limit_enable;
532 spin_unlock(&zram->wb_limit_lock);
533 up_read(&zram->init_lock);
534
535 return sysfs_emit(buf, "%d\n", val);
536 }
537
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)538 static ssize_t writeback_limit_store(struct device *dev,
539 struct device_attribute *attr, const char *buf, size_t len)
540 {
541 struct zram *zram = dev_to_zram(dev);
542 u64 val;
543 ssize_t ret = -EINVAL;
544
545 if (kstrtoull(buf, 10, &val))
546 return ret;
547
548 down_read(&zram->init_lock);
549 spin_lock(&zram->wb_limit_lock);
550 zram->bd_wb_limit = val;
551 spin_unlock(&zram->wb_limit_lock);
552 up_read(&zram->init_lock);
553 ret = len;
554
555 return ret;
556 }
557
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)558 static ssize_t writeback_limit_show(struct device *dev,
559 struct device_attribute *attr, char *buf)
560 {
561 u64 val;
562 struct zram *zram = dev_to_zram(dev);
563
564 down_read(&zram->init_lock);
565 spin_lock(&zram->wb_limit_lock);
566 val = zram->bd_wb_limit;
567 spin_unlock(&zram->wb_limit_lock);
568 up_read(&zram->init_lock);
569
570 return sysfs_emit(buf, "%llu\n", val);
571 }
572
reset_bdev(struct zram * zram)573 static void reset_bdev(struct zram *zram)
574 {
575 if (!zram->backing_dev)
576 return;
577
578 /* hope filp_close flush all of IO */
579 filp_close(zram->backing_dev, NULL);
580 zram->backing_dev = NULL;
581 zram->bdev = NULL;
582 zram->disk->fops = &zram_devops;
583 kvfree(zram->bitmap);
584 zram->bitmap = NULL;
585 }
586
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)587 static ssize_t backing_dev_show(struct device *dev,
588 struct device_attribute *attr, char *buf)
589 {
590 struct file *file;
591 struct zram *zram = dev_to_zram(dev);
592 char *p;
593 ssize_t ret;
594
595 down_read(&zram->init_lock);
596 file = zram->backing_dev;
597 if (!file) {
598 memcpy(buf, "none\n", 5);
599 up_read(&zram->init_lock);
600 return 5;
601 }
602
603 p = file_path(file, buf, PAGE_SIZE - 1);
604 if (IS_ERR(p)) {
605 ret = PTR_ERR(p);
606 goto out;
607 }
608
609 ret = strlen(p);
610 memmove(buf, p, ret);
611 buf[ret++] = '\n';
612 out:
613 up_read(&zram->init_lock);
614 return ret;
615 }
616
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)617 static ssize_t backing_dev_store(struct device *dev,
618 struct device_attribute *attr, const char *buf, size_t len)
619 {
620 char *file_name;
621 size_t sz;
622 struct file *backing_dev = NULL;
623 struct inode *inode;
624 unsigned int bitmap_sz;
625 unsigned long nr_pages, *bitmap = NULL;
626 int err;
627 struct zram *zram = dev_to_zram(dev);
628
629 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
630 if (!file_name)
631 return -ENOMEM;
632
633 down_write(&zram->init_lock);
634 if (init_done(zram)) {
635 pr_info("Can't setup backing device for initialized device\n");
636 err = -EBUSY;
637 goto out;
638 }
639
640 strscpy(file_name, buf, PATH_MAX);
641 /* ignore trailing newline */
642 sz = strlen(file_name);
643 if (sz > 0 && file_name[sz - 1] == '\n')
644 file_name[sz - 1] = 0x00;
645
646 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
647 if (IS_ERR(backing_dev)) {
648 err = PTR_ERR(backing_dev);
649 backing_dev = NULL;
650 goto out;
651 }
652
653 inode = backing_dev->f_mapping->host;
654
655 /* Support only block device in this moment */
656 if (!S_ISBLK(inode->i_mode)) {
657 err = -ENOTBLK;
658 goto out;
659 }
660
661 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
662 /* Refuse to use zero sized device (also prevents self reference) */
663 if (!nr_pages) {
664 err = -EINVAL;
665 goto out;
666 }
667
668 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
669 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
670 if (!bitmap) {
671 err = -ENOMEM;
672 goto out;
673 }
674
675 reset_bdev(zram);
676
677 zram->bdev = I_BDEV(inode);
678 zram->backing_dev = backing_dev;
679 zram->bitmap = bitmap;
680 zram->nr_pages = nr_pages;
681 up_write(&zram->init_lock);
682
683 pr_info("setup backing device %s\n", file_name);
684 kfree(file_name);
685
686 return len;
687 out:
688 kvfree(bitmap);
689
690 if (backing_dev)
691 filp_close(backing_dev, NULL);
692
693 up_write(&zram->init_lock);
694
695 kfree(file_name);
696
697 return err;
698 }
699
alloc_block_bdev(struct zram * zram)700 static unsigned long alloc_block_bdev(struct zram *zram)
701 {
702 unsigned long blk_idx = 1;
703 retry:
704 /* skip 0 bit to confuse zram.handle = 0 */
705 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
706 if (blk_idx == zram->nr_pages)
707 return 0;
708
709 if (test_and_set_bit(blk_idx, zram->bitmap))
710 goto retry;
711
712 atomic64_inc(&zram->stats.bd_count);
713 return blk_idx;
714 }
715
free_block_bdev(struct zram * zram,unsigned long blk_idx)716 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
717 {
718 int was_set;
719
720 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
721 WARN_ON_ONCE(!was_set);
722 atomic64_dec(&zram->stats.bd_count);
723 }
724
read_from_bdev_async(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)725 static void read_from_bdev_async(struct zram *zram, struct page *page,
726 unsigned long entry, struct bio *parent)
727 {
728 struct bio *bio;
729
730 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
731 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
732 __bio_add_page(bio, page, PAGE_SIZE, 0);
733 bio_chain(bio, parent);
734 submit_bio(bio);
735 }
736
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl)737 static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl)
738 {
739 unsigned long blk_idx = 0;
740 struct page *page = NULL;
741 struct zram_pp_slot *pps;
742 struct bio_vec bio_vec;
743 struct bio bio;
744 int ret = 0, err;
745 u32 index;
746
747 page = alloc_page(GFP_KERNEL);
748 if (!page)
749 return -ENOMEM;
750
751 while ((pps = select_pp_slot(ctl))) {
752 spin_lock(&zram->wb_limit_lock);
753 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
754 spin_unlock(&zram->wb_limit_lock);
755 ret = -EIO;
756 break;
757 }
758 spin_unlock(&zram->wb_limit_lock);
759
760 if (!blk_idx) {
761 blk_idx = alloc_block_bdev(zram);
762 if (!blk_idx) {
763 ret = -ENOSPC;
764 break;
765 }
766 }
767
768 index = pps->index;
769 zram_slot_lock(zram, index);
770 /*
771 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so
772 * slots can change in the meantime. If slots are accessed or
773 * freed they lose ZRAM_PP_SLOT flag and hence we don't
774 * post-process them.
775 */
776 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
777 goto next;
778 if (zram_read_from_zspool(zram, page, index))
779 goto next;
780 zram_slot_unlock(zram, index);
781
782 bio_init(&bio, zram->bdev, &bio_vec, 1,
783 REQ_OP_WRITE | REQ_SYNC);
784 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
785 __bio_add_page(&bio, page, PAGE_SIZE, 0);
786
787 /*
788 * XXX: A single page IO would be inefficient for write
789 * but it would be not bad as starter.
790 */
791 err = submit_bio_wait(&bio);
792 if (err) {
793 release_pp_slot(zram, pps);
794 /*
795 * BIO errors are not fatal, we continue and simply
796 * attempt to writeback the remaining objects (pages).
797 * At the same time we need to signal user-space that
798 * some writes (at least one, but also could be all of
799 * them) were not successful and we do so by returning
800 * the most recent BIO error.
801 */
802 ret = err;
803 continue;
804 }
805
806 atomic64_inc(&zram->stats.bd_writes);
807 zram_slot_lock(zram, index);
808 /*
809 * Same as above, we release slot lock during writeback so
810 * slot can change under us: slot_free() or slot_free() and
811 * reallocation (zram_write_page()). In both cases slot loses
812 * ZRAM_PP_SLOT flag. No concurrent post-processing can set
813 * ZRAM_PP_SLOT on such slots until current post-processing
814 * finishes.
815 */
816 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
817 goto next;
818
819 zram_free_page(zram, index);
820 zram_set_flag(zram, index, ZRAM_WB);
821 zram_set_handle(zram, index, blk_idx);
822 blk_idx = 0;
823 atomic64_inc(&zram->stats.pages_stored);
824 spin_lock(&zram->wb_limit_lock);
825 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
826 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
827 spin_unlock(&zram->wb_limit_lock);
828 next:
829 zram_slot_unlock(zram, index);
830 release_pp_slot(zram, pps);
831
832 cond_resched();
833 }
834
835 if (blk_idx)
836 free_block_bdev(zram, blk_idx);
837 if (page)
838 __free_page(page);
839
840 return ret;
841 }
842
843 #define PAGE_WRITEBACK 0
844 #define HUGE_WRITEBACK (1 << 0)
845 #define IDLE_WRITEBACK (1 << 1)
846 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
847
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)848 static int parse_page_index(char *val, unsigned long nr_pages,
849 unsigned long *lo, unsigned long *hi)
850 {
851 int ret;
852
853 ret = kstrtoul(val, 10, lo);
854 if (ret)
855 return ret;
856 if (*lo >= nr_pages)
857 return -ERANGE;
858 *hi = *lo + 1;
859 return 0;
860 }
861
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)862 static int parse_page_indexes(char *val, unsigned long nr_pages,
863 unsigned long *lo, unsigned long *hi)
864 {
865 char *delim;
866 int ret;
867
868 delim = strchr(val, '-');
869 if (!delim)
870 return -EINVAL;
871
872 *delim = 0x00;
873 ret = kstrtoul(val, 10, lo);
874 if (ret)
875 return ret;
876 if (*lo >= nr_pages)
877 return -ERANGE;
878
879 ret = kstrtoul(delim + 1, 10, hi);
880 if (ret)
881 return ret;
882 if (*hi >= nr_pages || *lo > *hi)
883 return -ERANGE;
884 *hi += 1;
885 return 0;
886 }
887
parse_mode(char * val,u32 * mode)888 static int parse_mode(char *val, u32 *mode)
889 {
890 *mode = 0;
891
892 if (!strcmp(val, "idle"))
893 *mode = IDLE_WRITEBACK;
894 if (!strcmp(val, "huge"))
895 *mode = HUGE_WRITEBACK;
896 if (!strcmp(val, "huge_idle"))
897 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
898 if (!strcmp(val, "incompressible"))
899 *mode = INCOMPRESSIBLE_WRITEBACK;
900
901 if (*mode == 0)
902 return -EINVAL;
903 return 0;
904 }
905
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)906 static int scan_slots_for_writeback(struct zram *zram, u32 mode,
907 unsigned long lo, unsigned long hi,
908 struct zram_pp_ctl *ctl)
909 {
910 u32 index = lo;
911
912 while (index < hi) {
913 bool ok = true;
914
915 zram_slot_lock(zram, index);
916 if (!zram_allocated(zram, index))
917 goto next;
918
919 if (zram_test_flag(zram, index, ZRAM_WB) ||
920 zram_test_flag(zram, index, ZRAM_SAME))
921 goto next;
922
923 if (mode & IDLE_WRITEBACK &&
924 !zram_test_flag(zram, index, ZRAM_IDLE))
925 goto next;
926 if (mode & HUGE_WRITEBACK &&
927 !zram_test_flag(zram, index, ZRAM_HUGE))
928 goto next;
929 if (mode & INCOMPRESSIBLE_WRITEBACK &&
930 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
931 goto next;
932
933 ok = place_pp_slot(zram, ctl, index);
934 next:
935 zram_slot_unlock(zram, index);
936 if (!ok)
937 break;
938 index++;
939 }
940
941 return 0;
942 }
943
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)944 static ssize_t writeback_store(struct device *dev,
945 struct device_attribute *attr,
946 const char *buf, size_t len)
947 {
948 struct zram *zram = dev_to_zram(dev);
949 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
950 unsigned long lo = 0, hi = nr_pages;
951 struct zram_pp_ctl *ctl = NULL;
952 char *args, *param, *val;
953 ssize_t ret = len;
954 int err, mode = 0;
955
956 down_read(&zram->init_lock);
957 if (!init_done(zram)) {
958 up_read(&zram->init_lock);
959 return -EINVAL;
960 }
961
962 /* Do not permit concurrent post-processing actions. */
963 if (atomic_xchg(&zram->pp_in_progress, 1)) {
964 up_read(&zram->init_lock);
965 return -EAGAIN;
966 }
967
968 if (!zram->backing_dev) {
969 ret = -ENODEV;
970 goto release_init_lock;
971 }
972
973 ctl = init_pp_ctl();
974 if (!ctl) {
975 ret = -ENOMEM;
976 goto release_init_lock;
977 }
978
979 args = skip_spaces(buf);
980 while (*args) {
981 args = next_arg(args, ¶m, &val);
982
983 /*
984 * Workaround to support the old writeback interface.
985 *
986 * The old writeback interface has a minor inconsistency and
987 * requires key=value only for page_index parameter, while the
988 * writeback mode is a valueless parameter.
989 *
990 * This is not the case anymore and now all parameters are
991 * required to have values, however, we need to support the
992 * legacy writeback interface format so we check if we can
993 * recognize a valueless parameter as the (legacy) writeback
994 * mode.
995 */
996 if (!val || !*val) {
997 err = parse_mode(param, &mode);
998 if (err) {
999 ret = err;
1000 goto release_init_lock;
1001 }
1002
1003 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1004 break;
1005 }
1006
1007 if (!strcmp(param, "type")) {
1008 err = parse_mode(val, &mode);
1009 if (err) {
1010 ret = err;
1011 goto release_init_lock;
1012 }
1013
1014 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1015 break;
1016 }
1017
1018 if (!strcmp(param, "page_index")) {
1019 err = parse_page_index(val, nr_pages, &lo, &hi);
1020 if (err) {
1021 ret = err;
1022 goto release_init_lock;
1023 }
1024
1025 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1026 continue;
1027 }
1028
1029 if (!strcmp(param, "page_indexes")) {
1030 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1031 if (err) {
1032 ret = err;
1033 goto release_init_lock;
1034 }
1035
1036 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1037 continue;
1038 }
1039 }
1040
1041 err = zram_writeback_slots(zram, ctl);
1042 if (err)
1043 ret = err;
1044
1045 release_init_lock:
1046 release_pp_ctl(zram, ctl);
1047 atomic_set(&zram->pp_in_progress, 0);
1048 up_read(&zram->init_lock);
1049
1050 return ret;
1051 }
1052
1053 struct zram_work {
1054 struct work_struct work;
1055 struct zram *zram;
1056 unsigned long entry;
1057 struct page *page;
1058 int error;
1059 };
1060
zram_sync_read(struct work_struct * work)1061 static void zram_sync_read(struct work_struct *work)
1062 {
1063 struct zram_work *zw = container_of(work, struct zram_work, work);
1064 struct bio_vec bv;
1065 struct bio bio;
1066
1067 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
1068 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
1069 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
1070 zw->error = submit_bio_wait(&bio);
1071 }
1072
1073 /*
1074 * Block layer want one ->submit_bio to be active at a time, so if we use
1075 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1076 * use a worker thread context.
1077 */
read_from_bdev_sync(struct zram * zram,struct page * page,unsigned long entry)1078 static int read_from_bdev_sync(struct zram *zram, struct page *page,
1079 unsigned long entry)
1080 {
1081 struct zram_work work;
1082
1083 work.page = page;
1084 work.zram = zram;
1085 work.entry = entry;
1086
1087 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
1088 queue_work(system_dfl_wq, &work.work);
1089 flush_work(&work.work);
1090 destroy_work_on_stack(&work.work);
1091
1092 return work.error;
1093 }
1094
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1095 static int read_from_bdev(struct zram *zram, struct page *page,
1096 unsigned long entry, struct bio *parent)
1097 {
1098 atomic64_inc(&zram->stats.bd_reads);
1099 if (!parent) {
1100 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1101 return -EIO;
1102 return read_from_bdev_sync(zram, page, entry);
1103 }
1104 read_from_bdev_async(zram, page, entry, parent);
1105 return 0;
1106 }
1107 #else
reset_bdev(struct zram * zram)1108 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1109 static int read_from_bdev(struct zram *zram, struct page *page,
1110 unsigned long entry, struct bio *parent)
1111 {
1112 return -EIO;
1113 }
1114
free_block_bdev(struct zram * zram,unsigned long blk_idx)1115 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
1116 #endif
1117
1118 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1119
1120 static struct dentry *zram_debugfs_root;
1121
zram_debugfs_create(void)1122 static void zram_debugfs_create(void)
1123 {
1124 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1125 }
1126
zram_debugfs_destroy(void)1127 static void zram_debugfs_destroy(void)
1128 {
1129 debugfs_remove_recursive(zram_debugfs_root);
1130 }
1131
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1132 static ssize_t read_block_state(struct file *file, char __user *buf,
1133 size_t count, loff_t *ppos)
1134 {
1135 char *kbuf;
1136 ssize_t index, written = 0;
1137 struct zram *zram = file->private_data;
1138 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1139 struct timespec64 ts;
1140
1141 kbuf = kvmalloc(count, GFP_KERNEL);
1142 if (!kbuf)
1143 return -ENOMEM;
1144
1145 down_read(&zram->init_lock);
1146 if (!init_done(zram)) {
1147 up_read(&zram->init_lock);
1148 kvfree(kbuf);
1149 return -EINVAL;
1150 }
1151
1152 for (index = *ppos; index < nr_pages; index++) {
1153 int copied;
1154
1155 zram_slot_lock(zram, index);
1156 if (!zram_allocated(zram, index))
1157 goto next;
1158
1159 ts = ktime_to_timespec64(zram->table[index].ac_time);
1160 copied = snprintf(kbuf + written, count,
1161 "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
1162 index, (s64)ts.tv_sec,
1163 ts.tv_nsec / NSEC_PER_USEC,
1164 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1165 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1166 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1167 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1168 zram_get_priority(zram, index) ? 'r' : '.',
1169 zram_test_flag(zram, index,
1170 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1171
1172 if (count <= copied) {
1173 zram_slot_unlock(zram, index);
1174 break;
1175 }
1176 written += copied;
1177 count -= copied;
1178 next:
1179 zram_slot_unlock(zram, index);
1180 *ppos += 1;
1181 }
1182
1183 up_read(&zram->init_lock);
1184 if (copy_to_user(buf, kbuf, written))
1185 written = -EFAULT;
1186 kvfree(kbuf);
1187
1188 return written;
1189 }
1190
1191 static const struct file_operations proc_zram_block_state_op = {
1192 .open = simple_open,
1193 .read = read_block_state,
1194 .llseek = default_llseek,
1195 };
1196
zram_debugfs_register(struct zram * zram)1197 static void zram_debugfs_register(struct zram *zram)
1198 {
1199 if (!zram_debugfs_root)
1200 return;
1201
1202 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1203 zram_debugfs_root);
1204 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1205 zram, &proc_zram_block_state_op);
1206 }
1207
zram_debugfs_unregister(struct zram * zram)1208 static void zram_debugfs_unregister(struct zram *zram)
1209 {
1210 debugfs_remove_recursive(zram->debugfs_dir);
1211 }
1212 #else
zram_debugfs_create(void)1213 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1214 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1215 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1216 static void zram_debugfs_unregister(struct zram *zram) {};
1217 #endif
1218
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1219 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1220 {
1221 /* Do not free statically defined compression algorithms */
1222 if (zram->comp_algs[prio] != default_compressor)
1223 kfree(zram->comp_algs[prio]);
1224
1225 zram->comp_algs[prio] = alg;
1226 }
1227
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1228 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1229 {
1230 char *compressor;
1231 size_t sz;
1232
1233 sz = strlen(buf);
1234 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1235 return -E2BIG;
1236
1237 compressor = kstrdup(buf, GFP_KERNEL);
1238 if (!compressor)
1239 return -ENOMEM;
1240
1241 /* ignore trailing newline */
1242 if (sz > 0 && compressor[sz - 1] == '\n')
1243 compressor[sz - 1] = 0x00;
1244
1245 if (!zcomp_available_algorithm(compressor)) {
1246 kfree(compressor);
1247 return -EINVAL;
1248 }
1249
1250 down_write(&zram->init_lock);
1251 if (init_done(zram)) {
1252 up_write(&zram->init_lock);
1253 kfree(compressor);
1254 pr_info("Can't change algorithm for initialized device\n");
1255 return -EBUSY;
1256 }
1257
1258 comp_algorithm_set(zram, prio, compressor);
1259 up_write(&zram->init_lock);
1260 return 0;
1261 }
1262
comp_params_reset(struct zram * zram,u32 prio)1263 static void comp_params_reset(struct zram *zram, u32 prio)
1264 {
1265 struct zcomp_params *params = &zram->params[prio];
1266
1267 vfree(params->dict);
1268 params->level = ZCOMP_PARAM_NOT_SET;
1269 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1270 params->dict_sz = 0;
1271 params->dict = NULL;
1272 }
1273
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1274 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1275 const char *dict_path,
1276 struct deflate_params *deflate_params)
1277 {
1278 ssize_t sz = 0;
1279
1280 comp_params_reset(zram, prio);
1281
1282 if (dict_path) {
1283 sz = kernel_read_file_from_path(dict_path, 0,
1284 &zram->params[prio].dict,
1285 INT_MAX,
1286 NULL,
1287 READING_POLICY);
1288 if (sz < 0)
1289 return -EINVAL;
1290 }
1291
1292 zram->params[prio].dict_sz = sz;
1293 zram->params[prio].level = level;
1294 zram->params[prio].deflate.winbits = deflate_params->winbits;
1295 return 0;
1296 }
1297
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1298 static ssize_t algorithm_params_store(struct device *dev,
1299 struct device_attribute *attr,
1300 const char *buf,
1301 size_t len)
1302 {
1303 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1304 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1305 struct deflate_params deflate_params;
1306 struct zram *zram = dev_to_zram(dev);
1307 int ret;
1308
1309 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1310
1311 args = skip_spaces(buf);
1312 while (*args) {
1313 args = next_arg(args, ¶m, &val);
1314
1315 if (!val || !*val)
1316 return -EINVAL;
1317
1318 if (!strcmp(param, "priority")) {
1319 ret = kstrtoint(val, 10, &prio);
1320 if (ret)
1321 return ret;
1322 continue;
1323 }
1324
1325 if (!strcmp(param, "level")) {
1326 ret = kstrtoint(val, 10, &level);
1327 if (ret)
1328 return ret;
1329 continue;
1330 }
1331
1332 if (!strcmp(param, "algo")) {
1333 algo = val;
1334 continue;
1335 }
1336
1337 if (!strcmp(param, "dict")) {
1338 dict_path = val;
1339 continue;
1340 }
1341
1342 if (!strcmp(param, "deflate.winbits")) {
1343 ret = kstrtoint(val, 10, &deflate_params.winbits);
1344 if (ret)
1345 return ret;
1346 continue;
1347 }
1348 }
1349
1350 /* Lookup priority by algorithm name */
1351 if (algo) {
1352 s32 p;
1353
1354 prio = -EINVAL;
1355 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) {
1356 if (!zram->comp_algs[p])
1357 continue;
1358
1359 if (!strcmp(zram->comp_algs[p], algo)) {
1360 prio = p;
1361 break;
1362 }
1363 }
1364 }
1365
1366 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1367 return -EINVAL;
1368
1369 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1370 return ret ? ret : len;
1371 }
1372
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1373 static ssize_t comp_algorithm_show(struct device *dev,
1374 struct device_attribute *attr,
1375 char *buf)
1376 {
1377 struct zram *zram = dev_to_zram(dev);
1378 ssize_t sz;
1379
1380 down_read(&zram->init_lock);
1381 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1382 up_read(&zram->init_lock);
1383 return sz;
1384 }
1385
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1386 static ssize_t comp_algorithm_store(struct device *dev,
1387 struct device_attribute *attr,
1388 const char *buf,
1389 size_t len)
1390 {
1391 struct zram *zram = dev_to_zram(dev);
1392 int ret;
1393
1394 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1395 return ret ? ret : len;
1396 }
1397
1398 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1399 static ssize_t recomp_algorithm_show(struct device *dev,
1400 struct device_attribute *attr,
1401 char *buf)
1402 {
1403 struct zram *zram = dev_to_zram(dev);
1404 ssize_t sz = 0;
1405 u32 prio;
1406
1407 down_read(&zram->init_lock);
1408 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1409 if (!zram->comp_algs[prio])
1410 continue;
1411
1412 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1413 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1414 }
1415 up_read(&zram->init_lock);
1416 return sz;
1417 }
1418
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1419 static ssize_t recomp_algorithm_store(struct device *dev,
1420 struct device_attribute *attr,
1421 const char *buf,
1422 size_t len)
1423 {
1424 struct zram *zram = dev_to_zram(dev);
1425 int prio = ZRAM_SECONDARY_COMP;
1426 char *args, *param, *val;
1427 char *alg = NULL;
1428 int ret;
1429
1430 args = skip_spaces(buf);
1431 while (*args) {
1432 args = next_arg(args, ¶m, &val);
1433
1434 if (!val || !*val)
1435 return -EINVAL;
1436
1437 if (!strcmp(param, "algo")) {
1438 alg = val;
1439 continue;
1440 }
1441
1442 if (!strcmp(param, "priority")) {
1443 ret = kstrtoint(val, 10, &prio);
1444 if (ret)
1445 return ret;
1446 continue;
1447 }
1448 }
1449
1450 if (!alg)
1451 return -EINVAL;
1452
1453 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1454 return -EINVAL;
1455
1456 ret = __comp_algorithm_store(zram, prio, alg);
1457 return ret ? ret : len;
1458 }
1459 #endif
1460
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1461 static ssize_t compact_store(struct device *dev,
1462 struct device_attribute *attr, const char *buf, size_t len)
1463 {
1464 struct zram *zram = dev_to_zram(dev);
1465
1466 down_read(&zram->init_lock);
1467 if (!init_done(zram)) {
1468 up_read(&zram->init_lock);
1469 return -EINVAL;
1470 }
1471
1472 zs_compact(zram->mem_pool);
1473 up_read(&zram->init_lock);
1474
1475 return len;
1476 }
1477
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1478 static ssize_t io_stat_show(struct device *dev,
1479 struct device_attribute *attr, char *buf)
1480 {
1481 struct zram *zram = dev_to_zram(dev);
1482 ssize_t ret;
1483
1484 down_read(&zram->init_lock);
1485 ret = sysfs_emit(buf,
1486 "%8llu %8llu 0 %8llu\n",
1487 (u64)atomic64_read(&zram->stats.failed_reads),
1488 (u64)atomic64_read(&zram->stats.failed_writes),
1489 (u64)atomic64_read(&zram->stats.notify_free));
1490 up_read(&zram->init_lock);
1491
1492 return ret;
1493 }
1494
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1495 static ssize_t mm_stat_show(struct device *dev,
1496 struct device_attribute *attr, char *buf)
1497 {
1498 struct zram *zram = dev_to_zram(dev);
1499 struct zs_pool_stats pool_stats;
1500 u64 orig_size, mem_used = 0;
1501 long max_used;
1502 ssize_t ret;
1503
1504 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1505
1506 down_read(&zram->init_lock);
1507 if (init_done(zram)) {
1508 mem_used = zs_get_total_pages(zram->mem_pool);
1509 zs_pool_stats(zram->mem_pool, &pool_stats);
1510 }
1511
1512 orig_size = atomic64_read(&zram->stats.pages_stored);
1513 max_used = atomic_long_read(&zram->stats.max_used_pages);
1514
1515 ret = sysfs_emit(buf,
1516 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1517 orig_size << PAGE_SHIFT,
1518 (u64)atomic64_read(&zram->stats.compr_data_size),
1519 mem_used << PAGE_SHIFT,
1520 zram->limit_pages << PAGE_SHIFT,
1521 max_used << PAGE_SHIFT,
1522 (u64)atomic64_read(&zram->stats.same_pages),
1523 atomic_long_read(&pool_stats.pages_compacted),
1524 (u64)atomic64_read(&zram->stats.huge_pages),
1525 (u64)atomic64_read(&zram->stats.huge_pages_since));
1526 up_read(&zram->init_lock);
1527
1528 return ret;
1529 }
1530
1531 #ifdef CONFIG_ZRAM_WRITEBACK
1532 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1533 static ssize_t bd_stat_show(struct device *dev,
1534 struct device_attribute *attr, char *buf)
1535 {
1536 struct zram *zram = dev_to_zram(dev);
1537 ssize_t ret;
1538
1539 down_read(&zram->init_lock);
1540 ret = sysfs_emit(buf,
1541 "%8llu %8llu %8llu\n",
1542 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1543 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1544 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1545 up_read(&zram->init_lock);
1546
1547 return ret;
1548 }
1549 #endif
1550
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1551 static ssize_t debug_stat_show(struct device *dev,
1552 struct device_attribute *attr, char *buf)
1553 {
1554 int version = 1;
1555 struct zram *zram = dev_to_zram(dev);
1556 ssize_t ret;
1557
1558 down_read(&zram->init_lock);
1559 ret = sysfs_emit(buf,
1560 "version: %d\n0 %8llu\n",
1561 version,
1562 (u64)atomic64_read(&zram->stats.miss_free));
1563 up_read(&zram->init_lock);
1564
1565 return ret;
1566 }
1567
1568 static DEVICE_ATTR_RO(io_stat);
1569 static DEVICE_ATTR_RO(mm_stat);
1570 #ifdef CONFIG_ZRAM_WRITEBACK
1571 static DEVICE_ATTR_RO(bd_stat);
1572 #endif
1573 static DEVICE_ATTR_RO(debug_stat);
1574
zram_meta_free(struct zram * zram,u64 disksize)1575 static void zram_meta_free(struct zram *zram, u64 disksize)
1576 {
1577 size_t num_pages = disksize >> PAGE_SHIFT;
1578 size_t index;
1579
1580 if (!zram->table)
1581 return;
1582
1583 /* Free all pages that are still in this zram device */
1584 for (index = 0; index < num_pages; index++)
1585 zram_free_page(zram, index);
1586
1587 zs_destroy_pool(zram->mem_pool);
1588 vfree(zram->table);
1589 zram->table = NULL;
1590 }
1591
zram_meta_alloc(struct zram * zram,u64 disksize)1592 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1593 {
1594 size_t num_pages, index;
1595
1596 num_pages = disksize >> PAGE_SHIFT;
1597 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1598 if (!zram->table)
1599 return false;
1600
1601 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1602 if (!zram->mem_pool) {
1603 vfree(zram->table);
1604 zram->table = NULL;
1605 return false;
1606 }
1607
1608 if (!huge_class_size)
1609 huge_class_size = zs_huge_class_size(zram->mem_pool);
1610
1611 for (index = 0; index < num_pages; index++)
1612 zram_slot_lock_init(zram, index);
1613
1614 return true;
1615 }
1616
zram_free_page(struct zram * zram,size_t index)1617 static void zram_free_page(struct zram *zram, size_t index)
1618 {
1619 unsigned long handle;
1620
1621 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
1622 zram->table[index].ac_time = 0;
1623 #endif
1624
1625 zram_clear_flag(zram, index, ZRAM_IDLE);
1626 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1627 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
1628 zram_set_priority(zram, index, 0);
1629
1630 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1631 zram_clear_flag(zram, index, ZRAM_HUGE);
1632 atomic64_dec(&zram->stats.huge_pages);
1633 }
1634
1635 if (zram_test_flag(zram, index, ZRAM_WB)) {
1636 zram_clear_flag(zram, index, ZRAM_WB);
1637 free_block_bdev(zram, zram_get_handle(zram, index));
1638 goto out;
1639 }
1640
1641 /*
1642 * No memory is allocated for same element filled pages.
1643 * Simply clear same page flag.
1644 */
1645 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1646 zram_clear_flag(zram, index, ZRAM_SAME);
1647 atomic64_dec(&zram->stats.same_pages);
1648 goto out;
1649 }
1650
1651 handle = zram_get_handle(zram, index);
1652 if (!handle)
1653 return;
1654
1655 zs_free(zram->mem_pool, handle);
1656
1657 atomic64_sub(zram_get_obj_size(zram, index),
1658 &zram->stats.compr_data_size);
1659 out:
1660 atomic64_dec(&zram->stats.pages_stored);
1661 zram_set_handle(zram, index, 0);
1662 zram_set_obj_size(zram, index, 0);
1663 }
1664
read_same_filled_page(struct zram * zram,struct page * page,u32 index)1665 static int read_same_filled_page(struct zram *zram, struct page *page,
1666 u32 index)
1667 {
1668 void *mem;
1669
1670 mem = kmap_local_page(page);
1671 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index));
1672 kunmap_local(mem);
1673 return 0;
1674 }
1675
read_incompressible_page(struct zram * zram,struct page * page,u32 index)1676 static int read_incompressible_page(struct zram *zram, struct page *page,
1677 u32 index)
1678 {
1679 unsigned long handle;
1680 void *src, *dst;
1681
1682 handle = zram_get_handle(zram, index);
1683 src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
1684 dst = kmap_local_page(page);
1685 copy_page(dst, src);
1686 kunmap_local(dst);
1687 zs_obj_read_end(zram->mem_pool, handle, src);
1688
1689 return 0;
1690 }
1691
read_compressed_page(struct zram * zram,struct page * page,u32 index)1692 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
1693 {
1694 struct zcomp_strm *zstrm;
1695 unsigned long handle;
1696 unsigned int size;
1697 void *src, *dst;
1698 int ret, prio;
1699
1700 handle = zram_get_handle(zram, index);
1701 size = zram_get_obj_size(zram, index);
1702 prio = zram_get_priority(zram, index);
1703
1704 zstrm = zcomp_stream_get(zram->comps[prio]);
1705 src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
1706 dst = kmap_local_page(page);
1707 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
1708 kunmap_local(dst);
1709 zs_obj_read_end(zram->mem_pool, handle, src);
1710 zcomp_stream_put(zstrm);
1711
1712 return ret;
1713 }
1714
1715 /*
1716 * Reads (decompresses if needed) a page from zspool (zsmalloc).
1717 * Corresponding ZRAM slot should be locked.
1718 */
zram_read_from_zspool(struct zram * zram,struct page * page,u32 index)1719 static int zram_read_from_zspool(struct zram *zram, struct page *page,
1720 u32 index)
1721 {
1722 if (zram_test_flag(zram, index, ZRAM_SAME) ||
1723 !zram_get_handle(zram, index))
1724 return read_same_filled_page(zram, page, index);
1725
1726 if (!zram_test_flag(zram, index, ZRAM_HUGE))
1727 return read_compressed_page(zram, page, index);
1728 else
1729 return read_incompressible_page(zram, page, index);
1730 }
1731
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)1732 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
1733 struct bio *parent)
1734 {
1735 int ret;
1736
1737 zram_slot_lock(zram, index);
1738 if (!zram_test_flag(zram, index, ZRAM_WB)) {
1739 /* Slot should be locked through out the function call */
1740 ret = zram_read_from_zspool(zram, page, index);
1741 zram_slot_unlock(zram, index);
1742 } else {
1743 /*
1744 * The slot should be unlocked before reading from the backing
1745 * device.
1746 */
1747 zram_slot_unlock(zram, index);
1748
1749 ret = read_from_bdev(zram, page, zram_get_handle(zram, index),
1750 parent);
1751 }
1752
1753 /* Should NEVER happen. Return bio error if it does. */
1754 if (WARN_ON(ret < 0))
1755 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1756
1757 return ret;
1758 }
1759
1760 /*
1761 * Use a temporary buffer to decompress the page, as the decompressor
1762 * always expects a full page for the output.
1763 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)1764 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
1765 u32 index, int offset)
1766 {
1767 struct page *page = alloc_page(GFP_NOIO);
1768 int ret;
1769
1770 if (!page)
1771 return -ENOMEM;
1772 ret = zram_read_page(zram, page, index, NULL);
1773 if (likely(!ret))
1774 memcpy_to_bvec(bvec, page_address(page) + offset);
1775 __free_page(page);
1776 return ret;
1777 }
1778
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1779 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1780 u32 index, int offset, struct bio *bio)
1781 {
1782 if (is_partial_io(bvec))
1783 return zram_bvec_read_partial(zram, bvec, index, offset);
1784 return zram_read_page(zram, bvec->bv_page, index, bio);
1785 }
1786
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)1787 static int write_same_filled_page(struct zram *zram, unsigned long fill,
1788 u32 index)
1789 {
1790 zram_slot_lock(zram, index);
1791 zram_free_page(zram, index);
1792 zram_set_flag(zram, index, ZRAM_SAME);
1793 zram_set_handle(zram, index, fill);
1794 zram_slot_unlock(zram, index);
1795
1796 atomic64_inc(&zram->stats.same_pages);
1797 atomic64_inc(&zram->stats.pages_stored);
1798
1799 return 0;
1800 }
1801
write_incompressible_page(struct zram * zram,struct page * page,u32 index)1802 static int write_incompressible_page(struct zram *zram, struct page *page,
1803 u32 index)
1804 {
1805 unsigned long handle;
1806 void *src;
1807
1808 /*
1809 * This function is called from preemptible context so we don't need
1810 * to do optimistic and fallback to pessimistic handle allocation,
1811 * like we do for compressible pages.
1812 */
1813 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
1814 GFP_NOIO | __GFP_NOWARN |
1815 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
1816 if (IS_ERR_VALUE(handle))
1817 return PTR_ERR((void *)handle);
1818
1819 if (!zram_can_store_page(zram)) {
1820 zs_free(zram->mem_pool, handle);
1821 return -ENOMEM;
1822 }
1823
1824 src = kmap_local_page(page);
1825 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
1826 kunmap_local(src);
1827
1828 zram_slot_lock(zram, index);
1829 zram_free_page(zram, index);
1830 zram_set_flag(zram, index, ZRAM_HUGE);
1831 zram_set_handle(zram, index, handle);
1832 zram_set_obj_size(zram, index, PAGE_SIZE);
1833 zram_slot_unlock(zram, index);
1834
1835 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
1836 atomic64_inc(&zram->stats.huge_pages);
1837 atomic64_inc(&zram->stats.huge_pages_since);
1838 atomic64_inc(&zram->stats.pages_stored);
1839
1840 return 0;
1841 }
1842
zram_write_page(struct zram * zram,struct page * page,u32 index)1843 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
1844 {
1845 int ret = 0;
1846 unsigned long handle;
1847 unsigned int comp_len;
1848 void *mem;
1849 struct zcomp_strm *zstrm;
1850 unsigned long element;
1851 bool same_filled;
1852
1853 mem = kmap_local_page(page);
1854 same_filled = page_same_filled(mem, &element);
1855 kunmap_local(mem);
1856 if (same_filled)
1857 return write_same_filled_page(zram, element, index);
1858
1859 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
1860 mem = kmap_local_page(page);
1861 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
1862 mem, &comp_len);
1863 kunmap_local(mem);
1864
1865 if (unlikely(ret)) {
1866 zcomp_stream_put(zstrm);
1867 pr_err("Compression failed! err=%d\n", ret);
1868 return ret;
1869 }
1870
1871 if (comp_len >= huge_class_size) {
1872 zcomp_stream_put(zstrm);
1873 return write_incompressible_page(zram, page, index);
1874 }
1875
1876 handle = zs_malloc(zram->mem_pool, comp_len,
1877 GFP_NOIO | __GFP_NOWARN |
1878 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
1879 if (IS_ERR_VALUE(handle)) {
1880 zcomp_stream_put(zstrm);
1881 return PTR_ERR((void *)handle);
1882 }
1883
1884 if (!zram_can_store_page(zram)) {
1885 zcomp_stream_put(zstrm);
1886 zs_free(zram->mem_pool, handle);
1887 return -ENOMEM;
1888 }
1889
1890 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
1891 zcomp_stream_put(zstrm);
1892
1893 zram_slot_lock(zram, index);
1894 zram_free_page(zram, index);
1895 zram_set_handle(zram, index, handle);
1896 zram_set_obj_size(zram, index, comp_len);
1897 zram_slot_unlock(zram, index);
1898
1899 /* Update stats */
1900 atomic64_inc(&zram->stats.pages_stored);
1901 atomic64_add(comp_len, &zram->stats.compr_data_size);
1902
1903 return ret;
1904 }
1905
1906 /*
1907 * This is a partial IO. Read the full page before writing the changes.
1908 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1909 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
1910 u32 index, int offset, struct bio *bio)
1911 {
1912 struct page *page = alloc_page(GFP_NOIO);
1913 int ret;
1914
1915 if (!page)
1916 return -ENOMEM;
1917
1918 ret = zram_read_page(zram, page, index, bio);
1919 if (!ret) {
1920 memcpy_from_bvec(page_address(page) + offset, bvec);
1921 ret = zram_write_page(zram, page, index);
1922 }
1923 __free_page(page);
1924 return ret;
1925 }
1926
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1927 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1928 u32 index, int offset, struct bio *bio)
1929 {
1930 if (is_partial_io(bvec))
1931 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
1932 return zram_write_page(zram, bvec->bv_page, index);
1933 }
1934
1935 #ifdef CONFIG_ZRAM_MULTI_COMP
1936 #define RECOMPRESS_IDLE (1 << 0)
1937 #define RECOMPRESS_HUGE (1 << 1)
1938
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio_max,struct zram_pp_ctl * ctl)1939 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
1940 struct zram_pp_ctl *ctl)
1941 {
1942 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1943 unsigned long index;
1944
1945 for (index = 0; index < nr_pages; index++) {
1946 bool ok = true;
1947
1948 zram_slot_lock(zram, index);
1949 if (!zram_allocated(zram, index))
1950 goto next;
1951
1952 if (mode & RECOMPRESS_IDLE &&
1953 !zram_test_flag(zram, index, ZRAM_IDLE))
1954 goto next;
1955
1956 if (mode & RECOMPRESS_HUGE &&
1957 !zram_test_flag(zram, index, ZRAM_HUGE))
1958 goto next;
1959
1960 if (zram_test_flag(zram, index, ZRAM_WB) ||
1961 zram_test_flag(zram, index, ZRAM_SAME) ||
1962 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1963 goto next;
1964
1965 /* Already compressed with same of higher priority */
1966 if (zram_get_priority(zram, index) + 1 >= prio_max)
1967 goto next;
1968
1969 ok = place_pp_slot(zram, ctl, index);
1970 next:
1971 zram_slot_unlock(zram, index);
1972 if (!ok)
1973 break;
1974 }
1975
1976 return 0;
1977 }
1978
1979 /*
1980 * This function will decompress (unless it's ZRAM_HUGE) the page and then
1981 * attempt to compress it using provided compression algorithm priority
1982 * (which is potentially more effective).
1983 *
1984 * Corresponding ZRAM slot should be locked.
1985 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio,u32 prio_max)1986 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
1987 u64 *num_recomp_pages, u32 threshold, u32 prio,
1988 u32 prio_max)
1989 {
1990 struct zcomp_strm *zstrm = NULL;
1991 unsigned long handle_old;
1992 unsigned long handle_new;
1993 unsigned int comp_len_old;
1994 unsigned int comp_len_new;
1995 unsigned int class_index_old;
1996 unsigned int class_index_new;
1997 void *src;
1998 int ret = 0;
1999
2000 handle_old = zram_get_handle(zram, index);
2001 if (!handle_old)
2002 return -EINVAL;
2003
2004 comp_len_old = zram_get_obj_size(zram, index);
2005 /*
2006 * Do not recompress objects that are already "small enough".
2007 */
2008 if (comp_len_old < threshold)
2009 return 0;
2010
2011 ret = zram_read_from_zspool(zram, page, index);
2012 if (ret)
2013 return ret;
2014
2015 /*
2016 * We touched this entry so mark it as non-IDLE. This makes sure that
2017 * we don't preserve IDLE flag and don't incorrectly pick this entry
2018 * for different post-processing type (e.g. writeback).
2019 */
2020 zram_clear_flag(zram, index, ZRAM_IDLE);
2021
2022 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2023
2024 prio = max(prio, zram_get_priority(zram, index) + 1);
2025 /*
2026 * Recompression slots scan should not select slots that are
2027 * already compressed with a higher priority algorithm, but
2028 * just in case
2029 */
2030 if (prio >= prio_max)
2031 return 0;
2032
2033 /*
2034 * Iterate the secondary comp algorithms list (in order of priority)
2035 * and try to recompress the page.
2036 */
2037 for (; prio < prio_max; prio++) {
2038 if (!zram->comps[prio])
2039 continue;
2040
2041 zstrm = zcomp_stream_get(zram->comps[prio]);
2042 src = kmap_local_page(page);
2043 ret = zcomp_compress(zram->comps[prio], zstrm,
2044 src, &comp_len_new);
2045 kunmap_local(src);
2046
2047 if (ret) {
2048 zcomp_stream_put(zstrm);
2049 zstrm = NULL;
2050 break;
2051 }
2052
2053 class_index_new = zs_lookup_class_index(zram->mem_pool,
2054 comp_len_new);
2055
2056 /* Continue until we make progress */
2057 if (class_index_new >= class_index_old ||
2058 (threshold && comp_len_new >= threshold)) {
2059 zcomp_stream_put(zstrm);
2060 zstrm = NULL;
2061 continue;
2062 }
2063
2064 /* Recompression was successful so break out */
2065 break;
2066 }
2067
2068 /*
2069 * Decrement the limit (if set) on pages we can recompress, even
2070 * when current recompression was unsuccessful or did not compress
2071 * the page below the threshold, because we still spent resources
2072 * on it.
2073 */
2074 if (*num_recomp_pages)
2075 *num_recomp_pages -= 1;
2076
2077 /* Compression error */
2078 if (ret)
2079 return ret;
2080
2081 if (!zstrm) {
2082 /*
2083 * Secondary algorithms failed to re-compress the page
2084 * in a way that would save memory.
2085 *
2086 * Mark the object incompressible if the max-priority
2087 * algorithm couldn't re-compress it.
2088 */
2089 if (prio < zram->num_active_comps)
2090 return 0;
2091 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2092 return 0;
2093 }
2094
2095 /*
2096 * We are holding per-CPU stream mutex and entry lock so better
2097 * avoid direct reclaim. Allocation error is not fatal since
2098 * we still have the old object in the mem_pool.
2099 *
2100 * XXX: technically, the node we really want here is the node that holds
2101 * the original compressed data. But that would require us to modify
2102 * zsmalloc API to return this information. For now, we will make do with
2103 * the node of the page allocated for recompression.
2104 */
2105 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2106 GFP_NOIO | __GFP_NOWARN |
2107 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2108 if (IS_ERR_VALUE(handle_new)) {
2109 zcomp_stream_put(zstrm);
2110 return PTR_ERR((void *)handle_new);
2111 }
2112
2113 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2114 zcomp_stream_put(zstrm);
2115
2116 zram_free_page(zram, index);
2117 zram_set_handle(zram, index, handle_new);
2118 zram_set_obj_size(zram, index, comp_len_new);
2119 zram_set_priority(zram, index, prio);
2120
2121 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2122 atomic64_inc(&zram->stats.pages_stored);
2123
2124 return 0;
2125 }
2126
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2127 static ssize_t recompress_store(struct device *dev,
2128 struct device_attribute *attr,
2129 const char *buf, size_t len)
2130 {
2131 struct zram *zram = dev_to_zram(dev);
2132 char *args, *param, *val, *algo = NULL;
2133 u64 num_recomp_pages = ULLONG_MAX;
2134 struct zram_pp_ctl *ctl = NULL;
2135 struct zram_pp_slot *pps;
2136 u32 mode = 0, threshold = 0;
2137 u32 prio, prio_max;
2138 struct page *page = NULL;
2139 ssize_t ret;
2140
2141 prio = ZRAM_SECONDARY_COMP;
2142 prio_max = zram->num_active_comps;
2143
2144 args = skip_spaces(buf);
2145 while (*args) {
2146 args = next_arg(args, ¶m, &val);
2147
2148 if (!val || !*val)
2149 return -EINVAL;
2150
2151 if (!strcmp(param, "type")) {
2152 if (!strcmp(val, "idle"))
2153 mode = RECOMPRESS_IDLE;
2154 if (!strcmp(val, "huge"))
2155 mode = RECOMPRESS_HUGE;
2156 if (!strcmp(val, "huge_idle"))
2157 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2158 continue;
2159 }
2160
2161 if (!strcmp(param, "max_pages")) {
2162 /*
2163 * Limit the number of entries (pages) we attempt to
2164 * recompress.
2165 */
2166 ret = kstrtoull(val, 10, &num_recomp_pages);
2167 if (ret)
2168 return ret;
2169 continue;
2170 }
2171
2172 if (!strcmp(param, "threshold")) {
2173 /*
2174 * We will re-compress only idle objects equal or
2175 * greater in size than watermark.
2176 */
2177 ret = kstrtouint(val, 10, &threshold);
2178 if (ret)
2179 return ret;
2180 continue;
2181 }
2182
2183 if (!strcmp(param, "algo")) {
2184 algo = val;
2185 continue;
2186 }
2187
2188 if (!strcmp(param, "priority")) {
2189 ret = kstrtouint(val, 10, &prio);
2190 if (ret)
2191 return ret;
2192
2193 if (prio == ZRAM_PRIMARY_COMP)
2194 prio = ZRAM_SECONDARY_COMP;
2195
2196 prio_max = prio + 1;
2197 continue;
2198 }
2199 }
2200
2201 if (threshold >= huge_class_size)
2202 return -EINVAL;
2203
2204 down_read(&zram->init_lock);
2205 if (!init_done(zram)) {
2206 ret = -EINVAL;
2207 goto release_init_lock;
2208 }
2209
2210 /* Do not permit concurrent post-processing actions. */
2211 if (atomic_xchg(&zram->pp_in_progress, 1)) {
2212 up_read(&zram->init_lock);
2213 return -EAGAIN;
2214 }
2215
2216 if (algo) {
2217 bool found = false;
2218
2219 for (; prio < ZRAM_MAX_COMPS; prio++) {
2220 if (!zram->comp_algs[prio])
2221 continue;
2222
2223 if (!strcmp(zram->comp_algs[prio], algo)) {
2224 prio_max = prio + 1;
2225 found = true;
2226 break;
2227 }
2228 }
2229
2230 if (!found) {
2231 ret = -EINVAL;
2232 goto release_init_lock;
2233 }
2234 }
2235
2236 prio_max = min(prio_max, (u32)zram->num_active_comps);
2237 if (prio >= prio_max) {
2238 ret = -EINVAL;
2239 goto release_init_lock;
2240 }
2241
2242 page = alloc_page(GFP_KERNEL);
2243 if (!page) {
2244 ret = -ENOMEM;
2245 goto release_init_lock;
2246 }
2247
2248 ctl = init_pp_ctl();
2249 if (!ctl) {
2250 ret = -ENOMEM;
2251 goto release_init_lock;
2252 }
2253
2254 scan_slots_for_recompress(zram, mode, prio_max, ctl);
2255
2256 ret = len;
2257 while ((pps = select_pp_slot(ctl))) {
2258 int err = 0;
2259
2260 if (!num_recomp_pages)
2261 break;
2262
2263 zram_slot_lock(zram, pps->index);
2264 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
2265 goto next;
2266
2267 err = recompress_slot(zram, pps->index, page,
2268 &num_recomp_pages, threshold,
2269 prio, prio_max);
2270 next:
2271 zram_slot_unlock(zram, pps->index);
2272 release_pp_slot(zram, pps);
2273
2274 if (err) {
2275 ret = err;
2276 break;
2277 }
2278
2279 cond_resched();
2280 }
2281
2282 release_init_lock:
2283 if (page)
2284 __free_page(page);
2285 release_pp_ctl(zram, ctl);
2286 atomic_set(&zram->pp_in_progress, 0);
2287 up_read(&zram->init_lock);
2288 return ret;
2289 }
2290 #endif
2291
zram_bio_discard(struct zram * zram,struct bio * bio)2292 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2293 {
2294 size_t n = bio->bi_iter.bi_size;
2295 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2296 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2297 SECTOR_SHIFT;
2298
2299 /*
2300 * zram manages data in physical block size units. Because logical block
2301 * size isn't identical with physical block size on some arch, we
2302 * could get a discard request pointing to a specific offset within a
2303 * certain physical block. Although we can handle this request by
2304 * reading that physiclal block and decompressing and partially zeroing
2305 * and re-compressing and then re-storing it, this isn't reasonable
2306 * because our intent with a discard request is to save memory. So
2307 * skipping this logical block is appropriate here.
2308 */
2309 if (offset) {
2310 if (n <= (PAGE_SIZE - offset))
2311 return;
2312
2313 n -= (PAGE_SIZE - offset);
2314 index++;
2315 }
2316
2317 while (n >= PAGE_SIZE) {
2318 zram_slot_lock(zram, index);
2319 zram_free_page(zram, index);
2320 zram_slot_unlock(zram, index);
2321 atomic64_inc(&zram->stats.notify_free);
2322 index++;
2323 n -= PAGE_SIZE;
2324 }
2325
2326 bio_endio(bio);
2327 }
2328
zram_bio_read(struct zram * zram,struct bio * bio)2329 static void zram_bio_read(struct zram *zram, struct bio *bio)
2330 {
2331 unsigned long start_time = bio_start_io_acct(bio);
2332 struct bvec_iter iter = bio->bi_iter;
2333
2334 do {
2335 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2336 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2337 SECTOR_SHIFT;
2338 struct bio_vec bv = bio_iter_iovec(bio, iter);
2339
2340 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2341
2342 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2343 atomic64_inc(&zram->stats.failed_reads);
2344 bio->bi_status = BLK_STS_IOERR;
2345 break;
2346 }
2347 flush_dcache_page(bv.bv_page);
2348
2349 zram_slot_lock(zram, index);
2350 zram_accessed(zram, index);
2351 zram_slot_unlock(zram, index);
2352
2353 bio_advance_iter_single(bio, &iter, bv.bv_len);
2354 } while (iter.bi_size);
2355
2356 bio_end_io_acct(bio, start_time);
2357 bio_endio(bio);
2358 }
2359
zram_bio_write(struct zram * zram,struct bio * bio)2360 static void zram_bio_write(struct zram *zram, struct bio *bio)
2361 {
2362 unsigned long start_time = bio_start_io_acct(bio);
2363 struct bvec_iter iter = bio->bi_iter;
2364
2365 do {
2366 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2367 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2368 SECTOR_SHIFT;
2369 struct bio_vec bv = bio_iter_iovec(bio, iter);
2370
2371 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2372
2373 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2374 atomic64_inc(&zram->stats.failed_writes);
2375 bio->bi_status = BLK_STS_IOERR;
2376 break;
2377 }
2378
2379 zram_slot_lock(zram, index);
2380 zram_accessed(zram, index);
2381 zram_slot_unlock(zram, index);
2382
2383 bio_advance_iter_single(bio, &iter, bv.bv_len);
2384 } while (iter.bi_size);
2385
2386 bio_end_io_acct(bio, start_time);
2387 bio_endio(bio);
2388 }
2389
2390 /*
2391 * Handler function for all zram I/O requests.
2392 */
zram_submit_bio(struct bio * bio)2393 static void zram_submit_bio(struct bio *bio)
2394 {
2395 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2396
2397 switch (bio_op(bio)) {
2398 case REQ_OP_READ:
2399 zram_bio_read(zram, bio);
2400 break;
2401 case REQ_OP_WRITE:
2402 zram_bio_write(zram, bio);
2403 break;
2404 case REQ_OP_DISCARD:
2405 case REQ_OP_WRITE_ZEROES:
2406 zram_bio_discard(zram, bio);
2407 break;
2408 default:
2409 WARN_ON_ONCE(1);
2410 bio_endio(bio);
2411 }
2412 }
2413
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2414 static void zram_slot_free_notify(struct block_device *bdev,
2415 unsigned long index)
2416 {
2417 struct zram *zram;
2418
2419 zram = bdev->bd_disk->private_data;
2420
2421 atomic64_inc(&zram->stats.notify_free);
2422 if (!zram_slot_trylock(zram, index)) {
2423 atomic64_inc(&zram->stats.miss_free);
2424 return;
2425 }
2426
2427 zram_free_page(zram, index);
2428 zram_slot_unlock(zram, index);
2429 }
2430
zram_comp_params_reset(struct zram * zram)2431 static void zram_comp_params_reset(struct zram *zram)
2432 {
2433 u32 prio;
2434
2435 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2436 comp_params_reset(zram, prio);
2437 }
2438 }
2439
zram_destroy_comps(struct zram * zram)2440 static void zram_destroy_comps(struct zram *zram)
2441 {
2442 u32 prio;
2443
2444 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2445 struct zcomp *comp = zram->comps[prio];
2446
2447 zram->comps[prio] = NULL;
2448 if (!comp)
2449 continue;
2450 zcomp_destroy(comp);
2451 zram->num_active_comps--;
2452 }
2453
2454 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2455 /* Do not free statically defined compression algorithms */
2456 if (zram->comp_algs[prio] != default_compressor)
2457 kfree(zram->comp_algs[prio]);
2458 zram->comp_algs[prio] = NULL;
2459 }
2460
2461 zram_comp_params_reset(zram);
2462 }
2463
zram_reset_device(struct zram * zram)2464 static void zram_reset_device(struct zram *zram)
2465 {
2466 down_write(&zram->init_lock);
2467
2468 zram->limit_pages = 0;
2469
2470 set_capacity_and_notify(zram->disk, 0);
2471 part_stat_set_all(zram->disk->part0, 0);
2472
2473 /* I/O operation under all of CPU are done so let's free */
2474 zram_meta_free(zram, zram->disksize);
2475 zram->disksize = 0;
2476 zram_destroy_comps(zram);
2477 memset(&zram->stats, 0, sizeof(zram->stats));
2478 atomic_set(&zram->pp_in_progress, 0);
2479 reset_bdev(zram);
2480
2481 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2482 up_write(&zram->init_lock);
2483 }
2484
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2485 static ssize_t disksize_store(struct device *dev,
2486 struct device_attribute *attr, const char *buf, size_t len)
2487 {
2488 u64 disksize;
2489 struct zcomp *comp;
2490 struct zram *zram = dev_to_zram(dev);
2491 int err;
2492 u32 prio;
2493
2494 disksize = memparse(buf, NULL);
2495 if (!disksize)
2496 return -EINVAL;
2497
2498 down_write(&zram->init_lock);
2499 if (init_done(zram)) {
2500 pr_info("Cannot change disksize for initialized device\n");
2501 err = -EBUSY;
2502 goto out_unlock;
2503 }
2504
2505 disksize = PAGE_ALIGN(disksize);
2506 if (!zram_meta_alloc(zram, disksize)) {
2507 err = -ENOMEM;
2508 goto out_unlock;
2509 }
2510
2511 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2512 if (!zram->comp_algs[prio])
2513 continue;
2514
2515 comp = zcomp_create(zram->comp_algs[prio],
2516 &zram->params[prio]);
2517 if (IS_ERR(comp)) {
2518 pr_err("Cannot initialise %s compressing backend\n",
2519 zram->comp_algs[prio]);
2520 err = PTR_ERR(comp);
2521 goto out_free_comps;
2522 }
2523
2524 zram->comps[prio] = comp;
2525 zram->num_active_comps++;
2526 }
2527 zram->disksize = disksize;
2528 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2529 up_write(&zram->init_lock);
2530
2531 return len;
2532
2533 out_free_comps:
2534 zram_destroy_comps(zram);
2535 zram_meta_free(zram, disksize);
2536 out_unlock:
2537 up_write(&zram->init_lock);
2538 return err;
2539 }
2540
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2541 static ssize_t reset_store(struct device *dev,
2542 struct device_attribute *attr, const char *buf, size_t len)
2543 {
2544 int ret;
2545 unsigned short do_reset;
2546 struct zram *zram;
2547 struct gendisk *disk;
2548
2549 ret = kstrtou16(buf, 10, &do_reset);
2550 if (ret)
2551 return ret;
2552
2553 if (!do_reset)
2554 return -EINVAL;
2555
2556 zram = dev_to_zram(dev);
2557 disk = zram->disk;
2558
2559 mutex_lock(&disk->open_mutex);
2560 /* Do not reset an active device or claimed device */
2561 if (disk_openers(disk) || zram->claim) {
2562 mutex_unlock(&disk->open_mutex);
2563 return -EBUSY;
2564 }
2565
2566 /* From now on, anyone can't open /dev/zram[0-9] */
2567 zram->claim = true;
2568 mutex_unlock(&disk->open_mutex);
2569
2570 /* Make sure all the pending I/O are finished */
2571 sync_blockdev(disk->part0);
2572 zram_reset_device(zram);
2573
2574 mutex_lock(&disk->open_mutex);
2575 zram->claim = false;
2576 mutex_unlock(&disk->open_mutex);
2577
2578 return len;
2579 }
2580
zram_open(struct gendisk * disk,blk_mode_t mode)2581 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2582 {
2583 struct zram *zram = disk->private_data;
2584
2585 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2586
2587 /* zram was claimed to reset so open request fails */
2588 if (zram->claim)
2589 return -EBUSY;
2590 return 0;
2591 }
2592
2593 static const struct block_device_operations zram_devops = {
2594 .open = zram_open,
2595 .submit_bio = zram_submit_bio,
2596 .swap_slot_free_notify = zram_slot_free_notify,
2597 .owner = THIS_MODULE
2598 };
2599
2600 static DEVICE_ATTR_WO(compact);
2601 static DEVICE_ATTR_RW(disksize);
2602 static DEVICE_ATTR_RO(initstate);
2603 static DEVICE_ATTR_WO(reset);
2604 static DEVICE_ATTR_WO(mem_limit);
2605 static DEVICE_ATTR_WO(mem_used_max);
2606 static DEVICE_ATTR_WO(idle);
2607 static DEVICE_ATTR_RW(comp_algorithm);
2608 #ifdef CONFIG_ZRAM_WRITEBACK
2609 static DEVICE_ATTR_RW(backing_dev);
2610 static DEVICE_ATTR_WO(writeback);
2611 static DEVICE_ATTR_RW(writeback_limit);
2612 static DEVICE_ATTR_RW(writeback_limit_enable);
2613 #endif
2614 #ifdef CONFIG_ZRAM_MULTI_COMP
2615 static DEVICE_ATTR_RW(recomp_algorithm);
2616 static DEVICE_ATTR_WO(recompress);
2617 #endif
2618 static DEVICE_ATTR_WO(algorithm_params);
2619
2620 static struct attribute *zram_disk_attrs[] = {
2621 &dev_attr_disksize.attr,
2622 &dev_attr_initstate.attr,
2623 &dev_attr_reset.attr,
2624 &dev_attr_compact.attr,
2625 &dev_attr_mem_limit.attr,
2626 &dev_attr_mem_used_max.attr,
2627 &dev_attr_idle.attr,
2628 &dev_attr_comp_algorithm.attr,
2629 #ifdef CONFIG_ZRAM_WRITEBACK
2630 &dev_attr_backing_dev.attr,
2631 &dev_attr_writeback.attr,
2632 &dev_attr_writeback_limit.attr,
2633 &dev_attr_writeback_limit_enable.attr,
2634 #endif
2635 &dev_attr_io_stat.attr,
2636 &dev_attr_mm_stat.attr,
2637 #ifdef CONFIG_ZRAM_WRITEBACK
2638 &dev_attr_bd_stat.attr,
2639 #endif
2640 &dev_attr_debug_stat.attr,
2641 #ifdef CONFIG_ZRAM_MULTI_COMP
2642 &dev_attr_recomp_algorithm.attr,
2643 &dev_attr_recompress.attr,
2644 #endif
2645 &dev_attr_algorithm_params.attr,
2646 NULL,
2647 };
2648
2649 ATTRIBUTE_GROUPS(zram_disk);
2650
2651 /*
2652 * Allocate and initialize new zram device. the function returns
2653 * '>= 0' device_id upon success, and negative value otherwise.
2654 */
zram_add(void)2655 static int zram_add(void)
2656 {
2657 struct queue_limits lim = {
2658 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
2659 /*
2660 * To ensure that we always get PAGE_SIZE aligned and
2661 * n*PAGE_SIZED sized I/O requests.
2662 */
2663 .physical_block_size = PAGE_SIZE,
2664 .io_min = PAGE_SIZE,
2665 .io_opt = PAGE_SIZE,
2666 .max_hw_discard_sectors = UINT_MAX,
2667 /*
2668 * zram_bio_discard() will clear all logical blocks if logical
2669 * block size is identical with physical block size(PAGE_SIZE).
2670 * But if it is different, we will skip discarding some parts of
2671 * logical blocks in the part of the request range which isn't
2672 * aligned to physical block size. So we can't ensure that all
2673 * discarded logical blocks are zeroed.
2674 */
2675 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
2676 .max_write_zeroes_sectors = UINT_MAX,
2677 #endif
2678 .features = BLK_FEAT_STABLE_WRITES |
2679 BLK_FEAT_SYNCHRONOUS,
2680 };
2681 struct zram *zram;
2682 int ret, device_id;
2683
2684 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2685 if (!zram)
2686 return -ENOMEM;
2687
2688 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
2689 if (ret < 0)
2690 goto out_free_dev;
2691 device_id = ret;
2692
2693 init_rwsem(&zram->init_lock);
2694 #ifdef CONFIG_ZRAM_WRITEBACK
2695 spin_lock_init(&zram->wb_limit_lock);
2696 #endif
2697
2698 /* gendisk structure */
2699 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
2700 if (IS_ERR(zram->disk)) {
2701 pr_err("Error allocating disk structure for device %d\n",
2702 device_id);
2703 ret = PTR_ERR(zram->disk);
2704 goto out_free_idr;
2705 }
2706
2707 zram->disk->major = zram_major;
2708 zram->disk->first_minor = device_id;
2709 zram->disk->minors = 1;
2710 zram->disk->flags |= GENHD_FL_NO_PART;
2711 zram->disk->fops = &zram_devops;
2712 zram->disk->private_data = zram;
2713 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
2714 atomic_set(&zram->pp_in_progress, 0);
2715 zram_comp_params_reset(zram);
2716 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2717
2718 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2719 set_capacity(zram->disk, 0);
2720 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
2721 if (ret)
2722 goto out_cleanup_disk;
2723
2724 zram_debugfs_register(zram);
2725 pr_info("Added device: %s\n", zram->disk->disk_name);
2726 return device_id;
2727
2728 out_cleanup_disk:
2729 put_disk(zram->disk);
2730 out_free_idr:
2731 idr_remove(&zram_index_idr, device_id);
2732 out_free_dev:
2733 kfree(zram);
2734 return ret;
2735 }
2736
zram_remove(struct zram * zram)2737 static int zram_remove(struct zram *zram)
2738 {
2739 bool claimed;
2740
2741 mutex_lock(&zram->disk->open_mutex);
2742 if (disk_openers(zram->disk)) {
2743 mutex_unlock(&zram->disk->open_mutex);
2744 return -EBUSY;
2745 }
2746
2747 claimed = zram->claim;
2748 if (!claimed)
2749 zram->claim = true;
2750 mutex_unlock(&zram->disk->open_mutex);
2751
2752 zram_debugfs_unregister(zram);
2753
2754 if (claimed) {
2755 /*
2756 * If we were claimed by reset_store(), del_gendisk() will
2757 * wait until reset_store() is done, so nothing need to do.
2758 */
2759 ;
2760 } else {
2761 /* Make sure all the pending I/O are finished */
2762 sync_blockdev(zram->disk->part0);
2763 zram_reset_device(zram);
2764 }
2765
2766 pr_info("Removed device: %s\n", zram->disk->disk_name);
2767
2768 del_gendisk(zram->disk);
2769
2770 /* del_gendisk drains pending reset_store */
2771 WARN_ON_ONCE(claimed && zram->claim);
2772
2773 /*
2774 * disksize_store() may be called in between zram_reset_device()
2775 * and del_gendisk(), so run the last reset to avoid leaking
2776 * anything allocated with disksize_store()
2777 */
2778 zram_reset_device(zram);
2779
2780 put_disk(zram->disk);
2781 kfree(zram);
2782 return 0;
2783 }
2784
2785 /* zram-control sysfs attributes */
2786
2787 /*
2788 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2789 * sense that reading from this file does alter the state of your system -- it
2790 * creates a new un-initialized zram device and returns back this device's
2791 * device_id (or an error code if it fails to create a new device).
2792 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)2793 static ssize_t hot_add_show(const struct class *class,
2794 const struct class_attribute *attr,
2795 char *buf)
2796 {
2797 int ret;
2798
2799 mutex_lock(&zram_index_mutex);
2800 ret = zram_add();
2801 mutex_unlock(&zram_index_mutex);
2802
2803 if (ret < 0)
2804 return ret;
2805 return sysfs_emit(buf, "%d\n", ret);
2806 }
2807 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
2808 static struct class_attribute class_attr_hot_add =
2809 __ATTR(hot_add, 0400, hot_add_show, NULL);
2810
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)2811 static ssize_t hot_remove_store(const struct class *class,
2812 const struct class_attribute *attr,
2813 const char *buf,
2814 size_t count)
2815 {
2816 struct zram *zram;
2817 int ret, dev_id;
2818
2819 /* dev_id is gendisk->first_minor, which is `int' */
2820 ret = kstrtoint(buf, 10, &dev_id);
2821 if (ret)
2822 return ret;
2823 if (dev_id < 0)
2824 return -EINVAL;
2825
2826 mutex_lock(&zram_index_mutex);
2827
2828 zram = idr_find(&zram_index_idr, dev_id);
2829 if (zram) {
2830 ret = zram_remove(zram);
2831 if (!ret)
2832 idr_remove(&zram_index_idr, dev_id);
2833 } else {
2834 ret = -ENODEV;
2835 }
2836
2837 mutex_unlock(&zram_index_mutex);
2838 return ret ? ret : count;
2839 }
2840 static CLASS_ATTR_WO(hot_remove);
2841
2842 static struct attribute *zram_control_class_attrs[] = {
2843 &class_attr_hot_add.attr,
2844 &class_attr_hot_remove.attr,
2845 NULL,
2846 };
2847 ATTRIBUTE_GROUPS(zram_control_class);
2848
2849 static struct class zram_control_class = {
2850 .name = "zram-control",
2851 .class_groups = zram_control_class_groups,
2852 };
2853
zram_remove_cb(int id,void * ptr,void * data)2854 static int zram_remove_cb(int id, void *ptr, void *data)
2855 {
2856 WARN_ON_ONCE(zram_remove(ptr));
2857 return 0;
2858 }
2859
destroy_devices(void)2860 static void destroy_devices(void)
2861 {
2862 class_unregister(&zram_control_class);
2863 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2864 zram_debugfs_destroy();
2865 idr_destroy(&zram_index_idr);
2866 unregister_blkdev(zram_major, "zram");
2867 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2868 }
2869
zram_init(void)2870 static int __init zram_init(void)
2871 {
2872 struct zram_table_entry zram_te;
2873 int ret;
2874
2875 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
2876
2877 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2878 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2879 if (ret < 0)
2880 return ret;
2881
2882 ret = class_register(&zram_control_class);
2883 if (ret) {
2884 pr_err("Unable to register zram-control class\n");
2885 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2886 return ret;
2887 }
2888
2889 zram_debugfs_create();
2890 zram_major = register_blkdev(0, "zram");
2891 if (zram_major <= 0) {
2892 pr_err("Unable to get major number\n");
2893 class_unregister(&zram_control_class);
2894 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2895 return -EBUSY;
2896 }
2897
2898 while (num_devices != 0) {
2899 mutex_lock(&zram_index_mutex);
2900 ret = zram_add();
2901 mutex_unlock(&zram_index_mutex);
2902 if (ret < 0)
2903 goto out_error;
2904 num_devices--;
2905 }
2906
2907 return 0;
2908
2909 out_error:
2910 destroy_devices();
2911 return ret;
2912 }
2913
zram_exit(void)2914 static void __exit zram_exit(void)
2915 {
2916 destroy_devices();
2917 }
2918
2919 module_init(zram_init);
2920 module_exit(zram_exit);
2921
2922 module_param(num_devices, uint, 0);
2923 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2924
2925 MODULE_LICENSE("Dual BSD/GPL");
2926 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2927 MODULE_DESCRIPTION("Compressed RAM Block Device");
2928