1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36 #include <linux/kernel_read_file.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47 #define ZRAM_MAX_ALGO_NAME_SZ 128
48
49 /* Module params (documentation at end) */
50 static unsigned int num_devices = 1;
51 /*
52 * Pages that compress to sizes equals or greater than this are stored
53 * uncompressed in memory.
54 */
55 static size_t huge_class_size;
56
57 static const struct block_device_operations zram_devops;
58
59 static void zram_free_page(struct zram *zram, size_t index);
60 static int zram_read_from_zspool(struct zram *zram, struct page *page,
61 u32 index);
62
63 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
64
zram_slot_lock_init(struct zram * zram,u32 index)65 static void zram_slot_lock_init(struct zram *zram, u32 index)
66 {
67 static struct lock_class_key __key;
68
69 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
70 &__key, 0);
71 }
72
73 /*
74 * entry locking rules:
75 *
76 * 1) Lock is exclusive
77 *
78 * 2) lock() function can sleep waiting for the lock
79 *
80 * 3) Lock owner can sleep
81 *
82 * 4) Use TRY lock variant when in atomic context
83 * - must check return value and handle locking failers
84 */
zram_slot_trylock(struct zram * zram,u32 index)85 static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
86 {
87 unsigned long *lock = &zram->table[index].flags;
88
89 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
90 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
91 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
92 return true;
93 }
94
95 return false;
96 }
97
zram_slot_lock(struct zram * zram,u32 index)98 static void zram_slot_lock(struct zram *zram, u32 index)
99 {
100 unsigned long *lock = &zram->table[index].flags;
101
102 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
103 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
104 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
105 }
106
zram_slot_unlock(struct zram * zram,u32 index)107 static void zram_slot_unlock(struct zram *zram, u32 index)
108 {
109 unsigned long *lock = &zram->table[index].flags;
110
111 mutex_release(slot_dep_map(zram, index), _RET_IP_);
112 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
113 }
114
init_done(struct zram * zram)115 static inline bool init_done(struct zram *zram)
116 {
117 return zram->disksize;
118 }
119
dev_to_zram(struct device * dev)120 static inline struct zram *dev_to_zram(struct device *dev)
121 {
122 return (struct zram *)dev_to_disk(dev)->private_data;
123 }
124
zram_get_handle(struct zram * zram,u32 index)125 static unsigned long zram_get_handle(struct zram *zram, u32 index)
126 {
127 return zram->table[index].handle;
128 }
129
zram_set_handle(struct zram * zram,u32 index,unsigned long handle)130 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
131 {
132 zram->table[index].handle = handle;
133 }
134
zram_test_flag(struct zram * zram,u32 index,enum zram_pageflags flag)135 static bool zram_test_flag(struct zram *zram, u32 index,
136 enum zram_pageflags flag)
137 {
138 return zram->table[index].flags & BIT(flag);
139 }
140
zram_set_flag(struct zram * zram,u32 index,enum zram_pageflags flag)141 static void zram_set_flag(struct zram *zram, u32 index,
142 enum zram_pageflags flag)
143 {
144 zram->table[index].flags |= BIT(flag);
145 }
146
zram_clear_flag(struct zram * zram,u32 index,enum zram_pageflags flag)147 static void zram_clear_flag(struct zram *zram, u32 index,
148 enum zram_pageflags flag)
149 {
150 zram->table[index].flags &= ~BIT(flag);
151 }
152
zram_get_obj_size(struct zram * zram,u32 index)153 static size_t zram_get_obj_size(struct zram *zram, u32 index)
154 {
155 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
156 }
157
zram_set_obj_size(struct zram * zram,u32 index,size_t size)158 static void zram_set_obj_size(struct zram *zram,
159 u32 index, size_t size)
160 {
161 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
162
163 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
164 }
165
zram_allocated(struct zram * zram,u32 index)166 static inline bool zram_allocated(struct zram *zram, u32 index)
167 {
168 return zram_get_obj_size(zram, index) ||
169 zram_test_flag(zram, index, ZRAM_SAME) ||
170 zram_test_flag(zram, index, ZRAM_WB);
171 }
172
update_used_max(struct zram * zram,const unsigned long pages)173 static inline void update_used_max(struct zram *zram, const unsigned long pages)
174 {
175 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
176
177 do {
178 if (cur_max >= pages)
179 return;
180 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
181 &cur_max, pages));
182 }
183
zram_can_store_page(struct zram * zram)184 static bool zram_can_store_page(struct zram *zram)
185 {
186 unsigned long alloced_pages;
187
188 alloced_pages = zs_get_total_pages(zram->mem_pool);
189 update_used_max(zram, alloced_pages);
190
191 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
192 }
193
194 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)195 static inline bool is_partial_io(struct bio_vec *bvec)
196 {
197 return bvec->bv_len != PAGE_SIZE;
198 }
199 #define ZRAM_PARTIAL_IO 1
200 #else
is_partial_io(struct bio_vec * bvec)201 static inline bool is_partial_io(struct bio_vec *bvec)
202 {
203 return false;
204 }
205 #endif
206
zram_set_priority(struct zram * zram,u32 index,u32 prio)207 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
208 {
209 prio &= ZRAM_COMP_PRIORITY_MASK;
210 /*
211 * Clear previous priority value first, in case if we recompress
212 * further an already recompressed page
213 */
214 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
215 ZRAM_COMP_PRIORITY_BIT1);
216 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
217 }
218
zram_get_priority(struct zram * zram,u32 index)219 static inline u32 zram_get_priority(struct zram *zram, u32 index)
220 {
221 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
222
223 return prio & ZRAM_COMP_PRIORITY_MASK;
224 }
225
zram_accessed(struct zram * zram,u32 index)226 static void zram_accessed(struct zram *zram, u32 index)
227 {
228 zram_clear_flag(zram, index, ZRAM_IDLE);
229 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
230 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
231 zram->table[index].ac_time = ktime_get_boottime();
232 #endif
233 }
234
235 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
236 struct zram_pp_slot {
237 unsigned long index;
238 struct list_head entry;
239 };
240
241 /*
242 * A post-processing bucket is, essentially, a size class, this defines
243 * the range (in bytes) of pp-slots sizes in particular bucket.
244 */
245 #define PP_BUCKET_SIZE_RANGE 64
246 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
247
248 struct zram_pp_ctl {
249 struct list_head pp_buckets[NUM_PP_BUCKETS];
250 };
251
init_pp_ctl(void)252 static struct zram_pp_ctl *init_pp_ctl(void)
253 {
254 struct zram_pp_ctl *ctl;
255 u32 idx;
256
257 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
258 if (!ctl)
259 return NULL;
260
261 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
262 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
263 return ctl;
264 }
265
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)266 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
267 {
268 list_del_init(&pps->entry);
269
270 zram_slot_lock(zram, pps->index);
271 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
272 zram_slot_unlock(zram, pps->index);
273
274 kfree(pps);
275 }
276
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)277 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
278 {
279 u32 idx;
280
281 if (!ctl)
282 return;
283
284 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
285 while (!list_empty(&ctl->pp_buckets[idx])) {
286 struct zram_pp_slot *pps;
287
288 pps = list_first_entry(&ctl->pp_buckets[idx],
289 struct zram_pp_slot,
290 entry);
291 release_pp_slot(zram, pps);
292 }
293 }
294
295 kfree(ctl);
296 }
297
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)298 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
299 u32 index)
300 {
301 struct zram_pp_slot *pps;
302 u32 bid;
303
304 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN);
305 if (!pps)
306 return false;
307
308 INIT_LIST_HEAD(&pps->entry);
309 pps->index = index;
310
311 bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
312 list_add(&pps->entry, &ctl->pp_buckets[bid]);
313
314 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
315 return true;
316 }
317
select_pp_slot(struct zram_pp_ctl * ctl)318 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
319 {
320 struct zram_pp_slot *pps = NULL;
321 s32 idx = NUM_PP_BUCKETS - 1;
322
323 /* The higher the bucket id the more optimal slot post-processing is */
324 while (idx >= 0) {
325 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
326 struct zram_pp_slot,
327 entry);
328 if (pps)
329 break;
330
331 idx--;
332 }
333 return pps;
334 }
335 #endif
336
zram_fill_page(void * ptr,unsigned long len,unsigned long value)337 static inline void zram_fill_page(void *ptr, unsigned long len,
338 unsigned long value)
339 {
340 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
341 memset_l(ptr, value, len / sizeof(unsigned long));
342 }
343
page_same_filled(void * ptr,unsigned long * element)344 static bool page_same_filled(void *ptr, unsigned long *element)
345 {
346 unsigned long *page;
347 unsigned long val;
348 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
349
350 page = (unsigned long *)ptr;
351 val = page[0];
352
353 if (val != page[last_pos])
354 return false;
355
356 for (pos = 1; pos < last_pos; pos++) {
357 if (val != page[pos])
358 return false;
359 }
360
361 *element = val;
362
363 return true;
364 }
365
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)366 static ssize_t initstate_show(struct device *dev,
367 struct device_attribute *attr, char *buf)
368 {
369 u32 val;
370 struct zram *zram = dev_to_zram(dev);
371
372 down_read(&zram->init_lock);
373 val = init_done(zram);
374 up_read(&zram->init_lock);
375
376 return sysfs_emit(buf, "%u\n", val);
377 }
378
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)379 static ssize_t disksize_show(struct device *dev,
380 struct device_attribute *attr, char *buf)
381 {
382 struct zram *zram = dev_to_zram(dev);
383
384 return sysfs_emit(buf, "%llu\n", zram->disksize);
385 }
386
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)387 static ssize_t mem_limit_store(struct device *dev,
388 struct device_attribute *attr, const char *buf, size_t len)
389 {
390 u64 limit;
391 char *tmp;
392 struct zram *zram = dev_to_zram(dev);
393
394 limit = memparse(buf, &tmp);
395 if (buf == tmp) /* no chars parsed, invalid input */
396 return -EINVAL;
397
398 down_write(&zram->init_lock);
399 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
400 up_write(&zram->init_lock);
401
402 return len;
403 }
404
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)405 static ssize_t mem_used_max_store(struct device *dev,
406 struct device_attribute *attr, const char *buf, size_t len)
407 {
408 int err;
409 unsigned long val;
410 struct zram *zram = dev_to_zram(dev);
411
412 err = kstrtoul(buf, 10, &val);
413 if (err || val != 0)
414 return -EINVAL;
415
416 down_read(&zram->init_lock);
417 if (init_done(zram)) {
418 atomic_long_set(&zram->stats.max_used_pages,
419 zs_get_total_pages(zram->mem_pool));
420 }
421 up_read(&zram->init_lock);
422
423 return len;
424 }
425
426 /*
427 * Mark all pages which are older than or equal to cutoff as IDLE.
428 * Callers should hold the zram init lock in read mode
429 */
mark_idle(struct zram * zram,ktime_t cutoff)430 static void mark_idle(struct zram *zram, ktime_t cutoff)
431 {
432 int is_idle = 1;
433 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
434 int index;
435
436 for (index = 0; index < nr_pages; index++) {
437 /*
438 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
439 * post-processing (recompress, writeback) happens to the
440 * ZRAM_SAME slot.
441 *
442 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
443 */
444 zram_slot_lock(zram, index);
445 if (!zram_allocated(zram, index) ||
446 zram_test_flag(zram, index, ZRAM_WB) ||
447 zram_test_flag(zram, index, ZRAM_SAME)) {
448 zram_slot_unlock(zram, index);
449 continue;
450 }
451
452 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
453 is_idle = !cutoff ||
454 ktime_after(cutoff, zram->table[index].ac_time);
455 #endif
456 if (is_idle)
457 zram_set_flag(zram, index, ZRAM_IDLE);
458 else
459 zram_clear_flag(zram, index, ZRAM_IDLE);
460 zram_slot_unlock(zram, index);
461 }
462 }
463
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)464 static ssize_t idle_store(struct device *dev,
465 struct device_attribute *attr, const char *buf, size_t len)
466 {
467 struct zram *zram = dev_to_zram(dev);
468 ktime_t cutoff_time = 0;
469 ssize_t rv = -EINVAL;
470
471 if (!sysfs_streq(buf, "all")) {
472 /*
473 * If it did not parse as 'all' try to treat it as an integer
474 * when we have memory tracking enabled.
475 */
476 u64 age_sec;
477
478 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec))
479 cutoff_time = ktime_sub(ktime_get_boottime(),
480 ns_to_ktime(age_sec * NSEC_PER_SEC));
481 else
482 goto out;
483 }
484
485 down_read(&zram->init_lock);
486 if (!init_done(zram))
487 goto out_unlock;
488
489 /*
490 * A cutoff_time of 0 marks everything as idle, this is the
491 * "all" behavior.
492 */
493 mark_idle(zram, cutoff_time);
494 rv = len;
495
496 out_unlock:
497 up_read(&zram->init_lock);
498 out:
499 return rv;
500 }
501
502 #ifdef CONFIG_ZRAM_WRITEBACK
503 #define INVALID_BDEV_BLOCK (~0UL)
504
505 struct zram_wb_ctl {
506 /* idle list is accessed only by the writeback task, no concurency */
507 struct list_head idle_reqs;
508 /* done list is accessed concurrently, protect by done_lock */
509 struct list_head done_reqs;
510 wait_queue_head_t done_wait;
511 spinlock_t done_lock;
512 atomic_t num_inflight;
513 };
514
515 struct zram_wb_req {
516 unsigned long blk_idx;
517 struct page *page;
518 struct zram_pp_slot *pps;
519 struct bio_vec bio_vec;
520 struct bio bio;
521
522 struct list_head entry;
523 };
524
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)525 static ssize_t writeback_limit_enable_store(struct device *dev,
526 struct device_attribute *attr,
527 const char *buf, size_t len)
528 {
529 struct zram *zram = dev_to_zram(dev);
530 u64 val;
531 ssize_t ret = -EINVAL;
532
533 if (kstrtoull(buf, 10, &val))
534 return ret;
535
536 down_write(&zram->init_lock);
537 zram->wb_limit_enable = val;
538 up_write(&zram->init_lock);
539 ret = len;
540
541 return ret;
542 }
543
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)544 static ssize_t writeback_limit_enable_show(struct device *dev,
545 struct device_attribute *attr,
546 char *buf)
547 {
548 bool val;
549 struct zram *zram = dev_to_zram(dev);
550
551 down_read(&zram->init_lock);
552 val = zram->wb_limit_enable;
553 up_read(&zram->init_lock);
554
555 return sysfs_emit(buf, "%d\n", val);
556 }
557
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)558 static ssize_t writeback_limit_store(struct device *dev,
559 struct device_attribute *attr,
560 const char *buf, size_t len)
561 {
562 struct zram *zram = dev_to_zram(dev);
563 u64 val;
564 ssize_t ret = -EINVAL;
565
566 if (kstrtoull(buf, 10, &val))
567 return ret;
568
569 /*
570 * When the page size is greater than 4KB, if bd_wb_limit is set to
571 * a value that is not page - size aligned, it will cause value
572 * wrapping. For example, when the page size is set to 16KB and
573 * bd_wb_limit is set to 3, a single write - back operation will
574 * cause bd_wb_limit to become -1. Even more terrifying is that
575 * bd_wb_limit is an unsigned number.
576 */
577 val = rounddown(val, PAGE_SIZE / 4096);
578
579 down_write(&zram->init_lock);
580 zram->bd_wb_limit = val;
581 up_write(&zram->init_lock);
582 ret = len;
583
584 return ret;
585 }
586
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)587 static ssize_t writeback_limit_show(struct device *dev,
588 struct device_attribute *attr, char *buf)
589 {
590 u64 val;
591 struct zram *zram = dev_to_zram(dev);
592
593 down_read(&zram->init_lock);
594 val = zram->bd_wb_limit;
595 up_read(&zram->init_lock);
596
597 return sysfs_emit(buf, "%llu\n", val);
598 }
599
writeback_batch_size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)600 static ssize_t writeback_batch_size_store(struct device *dev,
601 struct device_attribute *attr,
602 const char *buf, size_t len)
603 {
604 struct zram *zram = dev_to_zram(dev);
605 u32 val;
606
607 if (kstrtouint(buf, 10, &val))
608 return -EINVAL;
609
610 if (!val)
611 return -EINVAL;
612
613 down_write(&zram->init_lock);
614 zram->wb_batch_size = val;
615 up_write(&zram->init_lock);
616
617 return len;
618 }
619
writeback_batch_size_show(struct device * dev,struct device_attribute * attr,char * buf)620 static ssize_t writeback_batch_size_show(struct device *dev,
621 struct device_attribute *attr,
622 char *buf)
623 {
624 u32 val;
625 struct zram *zram = dev_to_zram(dev);
626
627 down_read(&zram->init_lock);
628 val = zram->wb_batch_size;
629 up_read(&zram->init_lock);
630
631 return sysfs_emit(buf, "%u\n", val);
632 }
633
reset_bdev(struct zram * zram)634 static void reset_bdev(struct zram *zram)
635 {
636 if (!zram->backing_dev)
637 return;
638
639 /* hope filp_close flush all of IO */
640 filp_close(zram->backing_dev, NULL);
641 zram->backing_dev = NULL;
642 zram->bdev = NULL;
643 zram->disk->fops = &zram_devops;
644 kvfree(zram->bitmap);
645 zram->bitmap = NULL;
646 }
647
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)648 static ssize_t backing_dev_show(struct device *dev,
649 struct device_attribute *attr, char *buf)
650 {
651 struct file *file;
652 struct zram *zram = dev_to_zram(dev);
653 char *p;
654 ssize_t ret;
655
656 down_read(&zram->init_lock);
657 file = zram->backing_dev;
658 if (!file) {
659 memcpy(buf, "none\n", 5);
660 up_read(&zram->init_lock);
661 return 5;
662 }
663
664 p = file_path(file, buf, PAGE_SIZE - 1);
665 if (IS_ERR(p)) {
666 ret = PTR_ERR(p);
667 goto out;
668 }
669
670 ret = strlen(p);
671 memmove(buf, p, ret);
672 buf[ret++] = '\n';
673 out:
674 up_read(&zram->init_lock);
675 return ret;
676 }
677
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)678 static ssize_t backing_dev_store(struct device *dev,
679 struct device_attribute *attr, const char *buf, size_t len)
680 {
681 char *file_name;
682 size_t sz;
683 struct file *backing_dev = NULL;
684 struct inode *inode;
685 unsigned int bitmap_sz;
686 unsigned long nr_pages, *bitmap = NULL;
687 int err;
688 struct zram *zram = dev_to_zram(dev);
689
690 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
691 if (!file_name)
692 return -ENOMEM;
693
694 down_write(&zram->init_lock);
695 if (init_done(zram)) {
696 pr_info("Can't setup backing device for initialized device\n");
697 err = -EBUSY;
698 goto out;
699 }
700
701 strscpy(file_name, buf, PATH_MAX);
702 /* ignore trailing newline */
703 sz = strlen(file_name);
704 if (sz > 0 && file_name[sz - 1] == '\n')
705 file_name[sz - 1] = 0x00;
706
707 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
708 if (IS_ERR(backing_dev)) {
709 err = PTR_ERR(backing_dev);
710 backing_dev = NULL;
711 goto out;
712 }
713
714 inode = backing_dev->f_mapping->host;
715
716 /* Support only block device in this moment */
717 if (!S_ISBLK(inode->i_mode)) {
718 err = -ENOTBLK;
719 goto out;
720 }
721
722 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
723 /* Refuse to use zero sized device (also prevents self reference) */
724 if (!nr_pages) {
725 err = -EINVAL;
726 goto out;
727 }
728
729 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
730 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
731 if (!bitmap) {
732 err = -ENOMEM;
733 goto out;
734 }
735
736 reset_bdev(zram);
737
738 zram->bdev = I_BDEV(inode);
739 zram->backing_dev = backing_dev;
740 zram->bitmap = bitmap;
741 zram->nr_pages = nr_pages;
742 up_write(&zram->init_lock);
743
744 pr_info("setup backing device %s\n", file_name);
745 kfree(file_name);
746
747 return len;
748 out:
749 kvfree(bitmap);
750
751 if (backing_dev)
752 filp_close(backing_dev, NULL);
753
754 up_write(&zram->init_lock);
755
756 kfree(file_name);
757
758 return err;
759 }
760
zram_reserve_bdev_block(struct zram * zram)761 static unsigned long zram_reserve_bdev_block(struct zram *zram)
762 {
763 unsigned long blk_idx;
764
765 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0);
766 if (blk_idx == zram->nr_pages)
767 return INVALID_BDEV_BLOCK;
768
769 set_bit(blk_idx, zram->bitmap);
770 atomic64_inc(&zram->stats.bd_count);
771 return blk_idx;
772 }
773
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)774 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
775 {
776 int was_set;
777
778 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
779 WARN_ON_ONCE(!was_set);
780 atomic64_dec(&zram->stats.bd_count);
781 }
782
read_from_bdev_async(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)783 static void read_from_bdev_async(struct zram *zram, struct page *page,
784 unsigned long entry, struct bio *parent)
785 {
786 struct bio *bio;
787
788 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
789 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
790 __bio_add_page(bio, page, PAGE_SIZE, 0);
791 bio_chain(bio, parent);
792 submit_bio(bio);
793 }
794
release_wb_req(struct zram_wb_req * req)795 static void release_wb_req(struct zram_wb_req *req)
796 {
797 __free_page(req->page);
798 kfree(req);
799 }
800
release_wb_ctl(struct zram_wb_ctl * wb_ctl)801 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
802 {
803 if (!wb_ctl)
804 return;
805
806 /* We should never have inflight requests at this point */
807 WARN_ON(atomic_read(&wb_ctl->num_inflight));
808 WARN_ON(!list_empty(&wb_ctl->done_reqs));
809
810 while (!list_empty(&wb_ctl->idle_reqs)) {
811 struct zram_wb_req *req;
812
813 req = list_first_entry(&wb_ctl->idle_reqs,
814 struct zram_wb_req, entry);
815 list_del(&req->entry);
816 release_wb_req(req);
817 }
818
819 kfree(wb_ctl);
820 }
821
init_wb_ctl(struct zram * zram)822 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
823 {
824 struct zram_wb_ctl *wb_ctl;
825 int i;
826
827 wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL);
828 if (!wb_ctl)
829 return NULL;
830
831 INIT_LIST_HEAD(&wb_ctl->idle_reqs);
832 INIT_LIST_HEAD(&wb_ctl->done_reqs);
833 atomic_set(&wb_ctl->num_inflight, 0);
834 init_waitqueue_head(&wb_ctl->done_wait);
835 spin_lock_init(&wb_ctl->done_lock);
836
837 for (i = 0; i < zram->wb_batch_size; i++) {
838 struct zram_wb_req *req;
839
840 /*
841 * This is fatal condition only if we couldn't allocate
842 * any requests at all. Otherwise we just work with the
843 * requests that we have successfully allocated, so that
844 * writeback can still proceed, even if there is only one
845 * request on the idle list.
846 */
847 req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN);
848 if (!req)
849 break;
850
851 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN);
852 if (!req->page) {
853 kfree(req);
854 break;
855 }
856
857 list_add(&req->entry, &wb_ctl->idle_reqs);
858 }
859
860 /* We couldn't allocate any requests, so writeabck is not possible */
861 if (list_empty(&wb_ctl->idle_reqs))
862 goto release_wb_ctl;
863
864 return wb_ctl;
865
866 release_wb_ctl:
867 release_wb_ctl(wb_ctl);
868 return NULL;
869 }
870
zram_account_writeback_rollback(struct zram * zram)871 static void zram_account_writeback_rollback(struct zram *zram)
872 {
873 lockdep_assert_held_read(&zram->init_lock);
874
875 if (zram->wb_limit_enable)
876 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
877 }
878
zram_account_writeback_submit(struct zram * zram)879 static void zram_account_writeback_submit(struct zram *zram)
880 {
881 lockdep_assert_held_read(&zram->init_lock);
882
883 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
884 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
885 }
886
zram_writeback_complete(struct zram * zram,struct zram_wb_req * req)887 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
888 {
889 u32 index = req->pps->index;
890 int err;
891
892 err = blk_status_to_errno(req->bio.bi_status);
893 if (err) {
894 /*
895 * Failed wb requests should not be accounted in wb_limit
896 * (if enabled).
897 */
898 zram_account_writeback_rollback(zram);
899 zram_release_bdev_block(zram, req->blk_idx);
900 return err;
901 }
902
903 atomic64_inc(&zram->stats.bd_writes);
904 zram_slot_lock(zram, index);
905 /*
906 * We release slot lock during writeback so slot can change under us:
907 * slot_free() or slot_free() and zram_write_page(). In both cases
908 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can
909 * set ZRAM_PP_SLOT on such slots until current post-processing
910 * finishes.
911 */
912 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) {
913 zram_release_bdev_block(zram, req->blk_idx);
914 goto out;
915 }
916
917 zram_free_page(zram, index);
918 zram_set_flag(zram, index, ZRAM_WB);
919 zram_set_handle(zram, index, req->blk_idx);
920 atomic64_inc(&zram->stats.pages_stored);
921
922 out:
923 zram_slot_unlock(zram, index);
924 return 0;
925 }
926
zram_writeback_endio(struct bio * bio)927 static void zram_writeback_endio(struct bio *bio)
928 {
929 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio);
930 struct zram_wb_ctl *wb_ctl = bio->bi_private;
931 unsigned long flags;
932
933 spin_lock_irqsave(&wb_ctl->done_lock, flags);
934 list_add(&req->entry, &wb_ctl->done_reqs);
935 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
936
937 wake_up(&wb_ctl->done_wait);
938 }
939
zram_submit_wb_request(struct zram * zram,struct zram_wb_ctl * wb_ctl,struct zram_wb_req * req)940 static void zram_submit_wb_request(struct zram *zram,
941 struct zram_wb_ctl *wb_ctl,
942 struct zram_wb_req *req)
943 {
944 /*
945 * wb_limit (if enabled) should be adjusted before submission,
946 * so that we don't over-submit.
947 */
948 zram_account_writeback_submit(zram);
949 atomic_inc(&wb_ctl->num_inflight);
950 req->bio.bi_private = wb_ctl;
951 submit_bio(&req->bio);
952 }
953
zram_complete_done_reqs(struct zram * zram,struct zram_wb_ctl * wb_ctl)954 static int zram_complete_done_reqs(struct zram *zram,
955 struct zram_wb_ctl *wb_ctl)
956 {
957 struct zram_wb_req *req;
958 unsigned long flags;
959 int ret = 0, err;
960
961 while (atomic_read(&wb_ctl->num_inflight) > 0) {
962 spin_lock_irqsave(&wb_ctl->done_lock, flags);
963 req = list_first_entry_or_null(&wb_ctl->done_reqs,
964 struct zram_wb_req, entry);
965 if (req)
966 list_del(&req->entry);
967 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
968
969 /* ->num_inflight > 0 doesn't mean we have done requests */
970 if (!req)
971 break;
972
973 err = zram_writeback_complete(zram, req);
974 if (err)
975 ret = err;
976
977 atomic_dec(&wb_ctl->num_inflight);
978 release_pp_slot(zram, req->pps);
979 req->pps = NULL;
980
981 list_add(&req->entry, &wb_ctl->idle_reqs);
982 }
983
984 return ret;
985 }
986
zram_select_idle_req(struct zram_wb_ctl * wb_ctl)987 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl)
988 {
989 struct zram_wb_req *req;
990
991 req = list_first_entry_or_null(&wb_ctl->idle_reqs,
992 struct zram_wb_req, entry);
993 if (req)
994 list_del(&req->entry);
995 return req;
996 }
997
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl,struct zram_wb_ctl * wb_ctl)998 static int zram_writeback_slots(struct zram *zram,
999 struct zram_pp_ctl *ctl,
1000 struct zram_wb_ctl *wb_ctl)
1001 {
1002 unsigned long blk_idx = INVALID_BDEV_BLOCK;
1003 struct zram_wb_req *req = NULL;
1004 struct zram_pp_slot *pps;
1005 int ret = 0, err = 0;
1006 u32 index = 0;
1007
1008 while ((pps = select_pp_slot(ctl))) {
1009 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
1010 ret = -EIO;
1011 break;
1012 }
1013
1014 while (!req) {
1015 req = zram_select_idle_req(wb_ctl);
1016 if (req)
1017 break;
1018
1019 wait_event(wb_ctl->done_wait,
1020 !list_empty(&wb_ctl->done_reqs));
1021
1022 err = zram_complete_done_reqs(zram, wb_ctl);
1023 /*
1024 * BIO errors are not fatal, we continue and simply
1025 * attempt to writeback the remaining objects (pages).
1026 * At the same time we need to signal user-space that
1027 * some writes (at least one, but also could be all of
1028 * them) were not successful and we do so by returning
1029 * the most recent BIO error.
1030 */
1031 if (err)
1032 ret = err;
1033 }
1034
1035 if (blk_idx == INVALID_BDEV_BLOCK) {
1036 blk_idx = zram_reserve_bdev_block(zram);
1037 if (blk_idx == INVALID_BDEV_BLOCK) {
1038 ret = -ENOSPC;
1039 break;
1040 }
1041 }
1042
1043 index = pps->index;
1044 zram_slot_lock(zram, index);
1045 /*
1046 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
1047 * slots can change in the meantime. If slots are accessed or
1048 * freed they lose ZRAM_PP_SLOT flag and hence we don't
1049 * post-process them.
1050 */
1051 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
1052 goto next;
1053 if (zram_read_from_zspool(zram, req->page, index))
1054 goto next;
1055 zram_slot_unlock(zram, index);
1056
1057 /*
1058 * From now on pp-slot is owned by the req, remove it from
1059 * its pp bucket.
1060 */
1061 list_del_init(&pps->entry);
1062
1063 req->blk_idx = blk_idx;
1064 req->pps = pps;
1065 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE);
1066 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1067 req->bio.bi_end_io = zram_writeback_endio;
1068 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0);
1069
1070 zram_submit_wb_request(zram, wb_ctl, req);
1071 blk_idx = INVALID_BDEV_BLOCK;
1072 req = NULL;
1073 cond_resched();
1074 continue;
1075
1076 next:
1077 zram_slot_unlock(zram, index);
1078 release_pp_slot(zram, pps);
1079 }
1080
1081 /*
1082 * Selected idle req, but never submitted it due to some error or
1083 * wb limit.
1084 */
1085 if (req)
1086 release_wb_req(req);
1087
1088 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1089 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs));
1090 err = zram_complete_done_reqs(zram, wb_ctl);
1091 if (err)
1092 ret = err;
1093 }
1094
1095 return ret;
1096 }
1097
1098 #define PAGE_WRITEBACK 0
1099 #define HUGE_WRITEBACK (1 << 0)
1100 #define IDLE_WRITEBACK (1 << 1)
1101 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
1102
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1103 static int parse_page_index(char *val, unsigned long nr_pages,
1104 unsigned long *lo, unsigned long *hi)
1105 {
1106 int ret;
1107
1108 ret = kstrtoul(val, 10, lo);
1109 if (ret)
1110 return ret;
1111 if (*lo >= nr_pages)
1112 return -ERANGE;
1113 *hi = *lo + 1;
1114 return 0;
1115 }
1116
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1117 static int parse_page_indexes(char *val, unsigned long nr_pages,
1118 unsigned long *lo, unsigned long *hi)
1119 {
1120 char *delim;
1121 int ret;
1122
1123 delim = strchr(val, '-');
1124 if (!delim)
1125 return -EINVAL;
1126
1127 *delim = 0x00;
1128 ret = kstrtoul(val, 10, lo);
1129 if (ret)
1130 return ret;
1131 if (*lo >= nr_pages)
1132 return -ERANGE;
1133
1134 ret = kstrtoul(delim + 1, 10, hi);
1135 if (ret)
1136 return ret;
1137 if (*hi >= nr_pages || *lo > *hi)
1138 return -ERANGE;
1139 *hi += 1;
1140 return 0;
1141 }
1142
parse_mode(char * val,u32 * mode)1143 static int parse_mode(char *val, u32 *mode)
1144 {
1145 *mode = 0;
1146
1147 if (!strcmp(val, "idle"))
1148 *mode = IDLE_WRITEBACK;
1149 if (!strcmp(val, "huge"))
1150 *mode = HUGE_WRITEBACK;
1151 if (!strcmp(val, "huge_idle"))
1152 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
1153 if (!strcmp(val, "incompressible"))
1154 *mode = INCOMPRESSIBLE_WRITEBACK;
1155
1156 if (*mode == 0)
1157 return -EINVAL;
1158 return 0;
1159 }
1160
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)1161 static int scan_slots_for_writeback(struct zram *zram, u32 mode,
1162 unsigned long lo, unsigned long hi,
1163 struct zram_pp_ctl *ctl)
1164 {
1165 u32 index = lo;
1166
1167 while (index < hi) {
1168 bool ok = true;
1169
1170 zram_slot_lock(zram, index);
1171 if (!zram_allocated(zram, index))
1172 goto next;
1173
1174 if (zram_test_flag(zram, index, ZRAM_WB) ||
1175 zram_test_flag(zram, index, ZRAM_SAME))
1176 goto next;
1177
1178 if (mode & IDLE_WRITEBACK &&
1179 !zram_test_flag(zram, index, ZRAM_IDLE))
1180 goto next;
1181 if (mode & HUGE_WRITEBACK &&
1182 !zram_test_flag(zram, index, ZRAM_HUGE))
1183 goto next;
1184 if (mode & INCOMPRESSIBLE_WRITEBACK &&
1185 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1186 goto next;
1187
1188 ok = place_pp_slot(zram, ctl, index);
1189 next:
1190 zram_slot_unlock(zram, index);
1191 if (!ok)
1192 break;
1193 index++;
1194 }
1195
1196 return 0;
1197 }
1198
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1199 static ssize_t writeback_store(struct device *dev,
1200 struct device_attribute *attr,
1201 const char *buf, size_t len)
1202 {
1203 struct zram *zram = dev_to_zram(dev);
1204 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
1205 unsigned long lo = 0, hi = nr_pages;
1206 struct zram_pp_ctl *pp_ctl = NULL;
1207 struct zram_wb_ctl *wb_ctl = NULL;
1208 char *args, *param, *val;
1209 ssize_t ret = len;
1210 int err, mode = 0;
1211
1212 down_read(&zram->init_lock);
1213 if (!init_done(zram)) {
1214 up_read(&zram->init_lock);
1215 return -EINVAL;
1216 }
1217
1218 /* Do not permit concurrent post-processing actions. */
1219 if (atomic_xchg(&zram->pp_in_progress, 1)) {
1220 up_read(&zram->init_lock);
1221 return -EAGAIN;
1222 }
1223
1224 if (!zram->backing_dev) {
1225 ret = -ENODEV;
1226 goto release_init_lock;
1227 }
1228
1229 pp_ctl = init_pp_ctl();
1230 if (!pp_ctl) {
1231 ret = -ENOMEM;
1232 goto release_init_lock;
1233 }
1234
1235 wb_ctl = init_wb_ctl(zram);
1236 if (!wb_ctl) {
1237 ret = -ENOMEM;
1238 goto release_init_lock;
1239 }
1240
1241 args = skip_spaces(buf);
1242 while (*args) {
1243 args = next_arg(args, ¶m, &val);
1244
1245 /*
1246 * Workaround to support the old writeback interface.
1247 *
1248 * The old writeback interface has a minor inconsistency and
1249 * requires key=value only for page_index parameter, while the
1250 * writeback mode is a valueless parameter.
1251 *
1252 * This is not the case anymore and now all parameters are
1253 * required to have values, however, we need to support the
1254 * legacy writeback interface format so we check if we can
1255 * recognize a valueless parameter as the (legacy) writeback
1256 * mode.
1257 */
1258 if (!val || !*val) {
1259 err = parse_mode(param, &mode);
1260 if (err) {
1261 ret = err;
1262 goto release_init_lock;
1263 }
1264
1265 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1266 break;
1267 }
1268
1269 if (!strcmp(param, "type")) {
1270 err = parse_mode(val, &mode);
1271 if (err) {
1272 ret = err;
1273 goto release_init_lock;
1274 }
1275
1276 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1277 break;
1278 }
1279
1280 if (!strcmp(param, "page_index")) {
1281 err = parse_page_index(val, nr_pages, &lo, &hi);
1282 if (err) {
1283 ret = err;
1284 goto release_init_lock;
1285 }
1286
1287 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1288 continue;
1289 }
1290
1291 if (!strcmp(param, "page_indexes")) {
1292 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1293 if (err) {
1294 ret = err;
1295 goto release_init_lock;
1296 }
1297
1298 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1299 continue;
1300 }
1301 }
1302
1303 err = zram_writeback_slots(zram, pp_ctl, wb_ctl);
1304 if (err)
1305 ret = err;
1306
1307 release_init_lock:
1308 release_pp_ctl(zram, pp_ctl);
1309 release_wb_ctl(wb_ctl);
1310 atomic_set(&zram->pp_in_progress, 0);
1311 up_read(&zram->init_lock);
1312
1313 return ret;
1314 }
1315
1316 struct zram_work {
1317 struct work_struct work;
1318 struct zram *zram;
1319 unsigned long entry;
1320 struct page *page;
1321 int error;
1322 };
1323
zram_sync_read(struct work_struct * work)1324 static void zram_sync_read(struct work_struct *work)
1325 {
1326 struct zram_work *zw = container_of(work, struct zram_work, work);
1327 struct bio_vec bv;
1328 struct bio bio;
1329
1330 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
1331 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
1332 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
1333 zw->error = submit_bio_wait(&bio);
1334 }
1335
1336 /*
1337 * Block layer want one ->submit_bio to be active at a time, so if we use
1338 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1339 * use a worker thread context.
1340 */
read_from_bdev_sync(struct zram * zram,struct page * page,unsigned long entry)1341 static int read_from_bdev_sync(struct zram *zram, struct page *page,
1342 unsigned long entry)
1343 {
1344 struct zram_work work;
1345
1346 work.page = page;
1347 work.zram = zram;
1348 work.entry = entry;
1349
1350 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
1351 queue_work(system_dfl_wq, &work.work);
1352 flush_work(&work.work);
1353 destroy_work_on_stack(&work.work);
1354
1355 return work.error;
1356 }
1357
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1358 static int read_from_bdev(struct zram *zram, struct page *page,
1359 unsigned long entry, struct bio *parent)
1360 {
1361 atomic64_inc(&zram->stats.bd_reads);
1362 if (!parent) {
1363 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1364 return -EIO;
1365 return read_from_bdev_sync(zram, page, entry);
1366 }
1367 read_from_bdev_async(zram, page, entry, parent);
1368 return 0;
1369 }
1370 #else
reset_bdev(struct zram * zram)1371 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1372 static int read_from_bdev(struct zram *zram, struct page *page,
1373 unsigned long entry, struct bio *parent)
1374 {
1375 return -EIO;
1376 }
1377
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)1378 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
1379 {
1380 }
1381 #endif
1382
1383 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1384
1385 static struct dentry *zram_debugfs_root;
1386
zram_debugfs_create(void)1387 static void zram_debugfs_create(void)
1388 {
1389 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1390 }
1391
zram_debugfs_destroy(void)1392 static void zram_debugfs_destroy(void)
1393 {
1394 debugfs_remove_recursive(zram_debugfs_root);
1395 }
1396
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1397 static ssize_t read_block_state(struct file *file, char __user *buf,
1398 size_t count, loff_t *ppos)
1399 {
1400 char *kbuf;
1401 ssize_t index, written = 0;
1402 struct zram *zram = file->private_data;
1403 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1404 struct timespec64 ts;
1405
1406 kbuf = kvmalloc(count, GFP_KERNEL);
1407 if (!kbuf)
1408 return -ENOMEM;
1409
1410 down_read(&zram->init_lock);
1411 if (!init_done(zram)) {
1412 up_read(&zram->init_lock);
1413 kvfree(kbuf);
1414 return -EINVAL;
1415 }
1416
1417 for (index = *ppos; index < nr_pages; index++) {
1418 int copied;
1419
1420 zram_slot_lock(zram, index);
1421 if (!zram_allocated(zram, index))
1422 goto next;
1423
1424 ts = ktime_to_timespec64(zram->table[index].ac_time);
1425 copied = snprintf(kbuf + written, count,
1426 "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
1427 index, (s64)ts.tv_sec,
1428 ts.tv_nsec / NSEC_PER_USEC,
1429 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1430 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1431 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1432 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1433 zram_get_priority(zram, index) ? 'r' : '.',
1434 zram_test_flag(zram, index,
1435 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1436
1437 if (count <= copied) {
1438 zram_slot_unlock(zram, index);
1439 break;
1440 }
1441 written += copied;
1442 count -= copied;
1443 next:
1444 zram_slot_unlock(zram, index);
1445 *ppos += 1;
1446 }
1447
1448 up_read(&zram->init_lock);
1449 if (copy_to_user(buf, kbuf, written))
1450 written = -EFAULT;
1451 kvfree(kbuf);
1452
1453 return written;
1454 }
1455
1456 static const struct file_operations proc_zram_block_state_op = {
1457 .open = simple_open,
1458 .read = read_block_state,
1459 .llseek = default_llseek,
1460 };
1461
zram_debugfs_register(struct zram * zram)1462 static void zram_debugfs_register(struct zram *zram)
1463 {
1464 if (!zram_debugfs_root)
1465 return;
1466
1467 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1468 zram_debugfs_root);
1469 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1470 zram, &proc_zram_block_state_op);
1471 }
1472
zram_debugfs_unregister(struct zram * zram)1473 static void zram_debugfs_unregister(struct zram *zram)
1474 {
1475 debugfs_remove_recursive(zram->debugfs_dir);
1476 }
1477 #else
zram_debugfs_create(void)1478 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1479 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1480 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1481 static void zram_debugfs_unregister(struct zram *zram) {};
1482 #endif
1483
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1484 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1485 {
1486 /* Do not free statically defined compression algorithms */
1487 if (zram->comp_algs[prio] != default_compressor)
1488 kfree(zram->comp_algs[prio]);
1489
1490 zram->comp_algs[prio] = alg;
1491 }
1492
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1493 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1494 {
1495 char *compressor;
1496 size_t sz;
1497
1498 sz = strlen(buf);
1499 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1500 return -E2BIG;
1501
1502 compressor = kstrdup(buf, GFP_KERNEL);
1503 if (!compressor)
1504 return -ENOMEM;
1505
1506 /* ignore trailing newline */
1507 if (sz > 0 && compressor[sz - 1] == '\n')
1508 compressor[sz - 1] = 0x00;
1509
1510 if (!zcomp_available_algorithm(compressor)) {
1511 kfree(compressor);
1512 return -EINVAL;
1513 }
1514
1515 down_write(&zram->init_lock);
1516 if (init_done(zram)) {
1517 up_write(&zram->init_lock);
1518 kfree(compressor);
1519 pr_info("Can't change algorithm for initialized device\n");
1520 return -EBUSY;
1521 }
1522
1523 comp_algorithm_set(zram, prio, compressor);
1524 up_write(&zram->init_lock);
1525 return 0;
1526 }
1527
comp_params_reset(struct zram * zram,u32 prio)1528 static void comp_params_reset(struct zram *zram, u32 prio)
1529 {
1530 struct zcomp_params *params = &zram->params[prio];
1531
1532 vfree(params->dict);
1533 params->level = ZCOMP_PARAM_NOT_SET;
1534 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1535 params->dict_sz = 0;
1536 params->dict = NULL;
1537 }
1538
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1539 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1540 const char *dict_path,
1541 struct deflate_params *deflate_params)
1542 {
1543 ssize_t sz = 0;
1544
1545 comp_params_reset(zram, prio);
1546
1547 if (dict_path) {
1548 sz = kernel_read_file_from_path(dict_path, 0,
1549 &zram->params[prio].dict,
1550 INT_MAX,
1551 NULL,
1552 READING_POLICY);
1553 if (sz < 0)
1554 return -EINVAL;
1555 }
1556
1557 zram->params[prio].dict_sz = sz;
1558 zram->params[prio].level = level;
1559 zram->params[prio].deflate.winbits = deflate_params->winbits;
1560 return 0;
1561 }
1562
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1563 static ssize_t algorithm_params_store(struct device *dev,
1564 struct device_attribute *attr,
1565 const char *buf,
1566 size_t len)
1567 {
1568 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1569 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1570 struct deflate_params deflate_params;
1571 struct zram *zram = dev_to_zram(dev);
1572 int ret;
1573
1574 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1575
1576 args = skip_spaces(buf);
1577 while (*args) {
1578 args = next_arg(args, ¶m, &val);
1579
1580 if (!val || !*val)
1581 return -EINVAL;
1582
1583 if (!strcmp(param, "priority")) {
1584 ret = kstrtoint(val, 10, &prio);
1585 if (ret)
1586 return ret;
1587 continue;
1588 }
1589
1590 if (!strcmp(param, "level")) {
1591 ret = kstrtoint(val, 10, &level);
1592 if (ret)
1593 return ret;
1594 continue;
1595 }
1596
1597 if (!strcmp(param, "algo")) {
1598 algo = val;
1599 continue;
1600 }
1601
1602 if (!strcmp(param, "dict")) {
1603 dict_path = val;
1604 continue;
1605 }
1606
1607 if (!strcmp(param, "deflate.winbits")) {
1608 ret = kstrtoint(val, 10, &deflate_params.winbits);
1609 if (ret)
1610 return ret;
1611 continue;
1612 }
1613 }
1614
1615 /* Lookup priority by algorithm name */
1616 if (algo) {
1617 s32 p;
1618
1619 prio = -EINVAL;
1620 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) {
1621 if (!zram->comp_algs[p])
1622 continue;
1623
1624 if (!strcmp(zram->comp_algs[p], algo)) {
1625 prio = p;
1626 break;
1627 }
1628 }
1629 }
1630
1631 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1632 return -EINVAL;
1633
1634 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1635 return ret ? ret : len;
1636 }
1637
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1638 static ssize_t comp_algorithm_show(struct device *dev,
1639 struct device_attribute *attr,
1640 char *buf)
1641 {
1642 struct zram *zram = dev_to_zram(dev);
1643 ssize_t sz;
1644
1645 down_read(&zram->init_lock);
1646 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1647 up_read(&zram->init_lock);
1648 return sz;
1649 }
1650
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1651 static ssize_t comp_algorithm_store(struct device *dev,
1652 struct device_attribute *attr,
1653 const char *buf,
1654 size_t len)
1655 {
1656 struct zram *zram = dev_to_zram(dev);
1657 int ret;
1658
1659 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1660 return ret ? ret : len;
1661 }
1662
1663 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1664 static ssize_t recomp_algorithm_show(struct device *dev,
1665 struct device_attribute *attr,
1666 char *buf)
1667 {
1668 struct zram *zram = dev_to_zram(dev);
1669 ssize_t sz = 0;
1670 u32 prio;
1671
1672 down_read(&zram->init_lock);
1673 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1674 if (!zram->comp_algs[prio])
1675 continue;
1676
1677 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1678 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1679 }
1680 up_read(&zram->init_lock);
1681 return sz;
1682 }
1683
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1684 static ssize_t recomp_algorithm_store(struct device *dev,
1685 struct device_attribute *attr,
1686 const char *buf,
1687 size_t len)
1688 {
1689 struct zram *zram = dev_to_zram(dev);
1690 int prio = ZRAM_SECONDARY_COMP;
1691 char *args, *param, *val;
1692 char *alg = NULL;
1693 int ret;
1694
1695 args = skip_spaces(buf);
1696 while (*args) {
1697 args = next_arg(args, ¶m, &val);
1698
1699 if (!val || !*val)
1700 return -EINVAL;
1701
1702 if (!strcmp(param, "algo")) {
1703 alg = val;
1704 continue;
1705 }
1706
1707 if (!strcmp(param, "priority")) {
1708 ret = kstrtoint(val, 10, &prio);
1709 if (ret)
1710 return ret;
1711 continue;
1712 }
1713 }
1714
1715 if (!alg)
1716 return -EINVAL;
1717
1718 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1719 return -EINVAL;
1720
1721 ret = __comp_algorithm_store(zram, prio, alg);
1722 return ret ? ret : len;
1723 }
1724 #endif
1725
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1726 static ssize_t compact_store(struct device *dev,
1727 struct device_attribute *attr, const char *buf, size_t len)
1728 {
1729 struct zram *zram = dev_to_zram(dev);
1730
1731 down_read(&zram->init_lock);
1732 if (!init_done(zram)) {
1733 up_read(&zram->init_lock);
1734 return -EINVAL;
1735 }
1736
1737 zs_compact(zram->mem_pool);
1738 up_read(&zram->init_lock);
1739
1740 return len;
1741 }
1742
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1743 static ssize_t io_stat_show(struct device *dev,
1744 struct device_attribute *attr, char *buf)
1745 {
1746 struct zram *zram = dev_to_zram(dev);
1747 ssize_t ret;
1748
1749 down_read(&zram->init_lock);
1750 ret = sysfs_emit(buf,
1751 "%8llu %8llu 0 %8llu\n",
1752 (u64)atomic64_read(&zram->stats.failed_reads),
1753 (u64)atomic64_read(&zram->stats.failed_writes),
1754 (u64)atomic64_read(&zram->stats.notify_free));
1755 up_read(&zram->init_lock);
1756
1757 return ret;
1758 }
1759
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1760 static ssize_t mm_stat_show(struct device *dev,
1761 struct device_attribute *attr, char *buf)
1762 {
1763 struct zram *zram = dev_to_zram(dev);
1764 struct zs_pool_stats pool_stats;
1765 u64 orig_size, mem_used = 0;
1766 long max_used;
1767 ssize_t ret;
1768
1769 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1770
1771 down_read(&zram->init_lock);
1772 if (init_done(zram)) {
1773 mem_used = zs_get_total_pages(zram->mem_pool);
1774 zs_pool_stats(zram->mem_pool, &pool_stats);
1775 }
1776
1777 orig_size = atomic64_read(&zram->stats.pages_stored);
1778 max_used = atomic_long_read(&zram->stats.max_used_pages);
1779
1780 ret = sysfs_emit(buf,
1781 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1782 orig_size << PAGE_SHIFT,
1783 (u64)atomic64_read(&zram->stats.compr_data_size),
1784 mem_used << PAGE_SHIFT,
1785 zram->limit_pages << PAGE_SHIFT,
1786 max_used << PAGE_SHIFT,
1787 (u64)atomic64_read(&zram->stats.same_pages),
1788 atomic_long_read(&pool_stats.pages_compacted),
1789 (u64)atomic64_read(&zram->stats.huge_pages),
1790 (u64)atomic64_read(&zram->stats.huge_pages_since));
1791 up_read(&zram->init_lock);
1792
1793 return ret;
1794 }
1795
1796 #ifdef CONFIG_ZRAM_WRITEBACK
1797 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1798 static ssize_t bd_stat_show(struct device *dev,
1799 struct device_attribute *attr, char *buf)
1800 {
1801 struct zram *zram = dev_to_zram(dev);
1802 ssize_t ret;
1803
1804 down_read(&zram->init_lock);
1805 ret = sysfs_emit(buf,
1806 "%8llu %8llu %8llu\n",
1807 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1808 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1809 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1810 up_read(&zram->init_lock);
1811
1812 return ret;
1813 }
1814 #endif
1815
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1816 static ssize_t debug_stat_show(struct device *dev,
1817 struct device_attribute *attr, char *buf)
1818 {
1819 int version = 1;
1820 struct zram *zram = dev_to_zram(dev);
1821 ssize_t ret;
1822
1823 down_read(&zram->init_lock);
1824 ret = sysfs_emit(buf,
1825 "version: %d\n0 %8llu\n",
1826 version,
1827 (u64)atomic64_read(&zram->stats.miss_free));
1828 up_read(&zram->init_lock);
1829
1830 return ret;
1831 }
1832
1833 static DEVICE_ATTR_RO(io_stat);
1834 static DEVICE_ATTR_RO(mm_stat);
1835 #ifdef CONFIG_ZRAM_WRITEBACK
1836 static DEVICE_ATTR_RO(bd_stat);
1837 #endif
1838 static DEVICE_ATTR_RO(debug_stat);
1839
zram_meta_free(struct zram * zram,u64 disksize)1840 static void zram_meta_free(struct zram *zram, u64 disksize)
1841 {
1842 size_t num_pages = disksize >> PAGE_SHIFT;
1843 size_t index;
1844
1845 if (!zram->table)
1846 return;
1847
1848 /* Free all pages that are still in this zram device */
1849 for (index = 0; index < num_pages; index++)
1850 zram_free_page(zram, index);
1851
1852 zs_destroy_pool(zram->mem_pool);
1853 vfree(zram->table);
1854 zram->table = NULL;
1855 }
1856
zram_meta_alloc(struct zram * zram,u64 disksize)1857 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1858 {
1859 size_t num_pages, index;
1860
1861 num_pages = disksize >> PAGE_SHIFT;
1862 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1863 if (!zram->table)
1864 return false;
1865
1866 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1867 if (!zram->mem_pool) {
1868 vfree(zram->table);
1869 zram->table = NULL;
1870 return false;
1871 }
1872
1873 if (!huge_class_size)
1874 huge_class_size = zs_huge_class_size(zram->mem_pool);
1875
1876 for (index = 0; index < num_pages; index++)
1877 zram_slot_lock_init(zram, index);
1878
1879 return true;
1880 }
1881
zram_free_page(struct zram * zram,size_t index)1882 static void zram_free_page(struct zram *zram, size_t index)
1883 {
1884 unsigned long handle;
1885
1886 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
1887 zram->table[index].ac_time = 0;
1888 #endif
1889
1890 zram_clear_flag(zram, index, ZRAM_IDLE);
1891 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1892 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
1893 zram_set_priority(zram, index, 0);
1894
1895 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1896 zram_clear_flag(zram, index, ZRAM_HUGE);
1897 atomic64_dec(&zram->stats.huge_pages);
1898 }
1899
1900 if (zram_test_flag(zram, index, ZRAM_WB)) {
1901 zram_clear_flag(zram, index, ZRAM_WB);
1902 zram_release_bdev_block(zram, zram_get_handle(zram, index));
1903 goto out;
1904 }
1905
1906 /*
1907 * No memory is allocated for same element filled pages.
1908 * Simply clear same page flag.
1909 */
1910 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1911 zram_clear_flag(zram, index, ZRAM_SAME);
1912 atomic64_dec(&zram->stats.same_pages);
1913 goto out;
1914 }
1915
1916 handle = zram_get_handle(zram, index);
1917 if (!handle)
1918 return;
1919
1920 zs_free(zram->mem_pool, handle);
1921
1922 atomic64_sub(zram_get_obj_size(zram, index),
1923 &zram->stats.compr_data_size);
1924 out:
1925 atomic64_dec(&zram->stats.pages_stored);
1926 zram_set_handle(zram, index, 0);
1927 zram_set_obj_size(zram, index, 0);
1928 }
1929
read_same_filled_page(struct zram * zram,struct page * page,u32 index)1930 static int read_same_filled_page(struct zram *zram, struct page *page,
1931 u32 index)
1932 {
1933 void *mem;
1934
1935 mem = kmap_local_page(page);
1936 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index));
1937 kunmap_local(mem);
1938 return 0;
1939 }
1940
read_incompressible_page(struct zram * zram,struct page * page,u32 index)1941 static int read_incompressible_page(struct zram *zram, struct page *page,
1942 u32 index)
1943 {
1944 unsigned long handle;
1945 void *src, *dst;
1946
1947 handle = zram_get_handle(zram, index);
1948 src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
1949 dst = kmap_local_page(page);
1950 copy_page(dst, src);
1951 kunmap_local(dst);
1952 zs_obj_read_end(zram->mem_pool, handle, src);
1953
1954 return 0;
1955 }
1956
read_compressed_page(struct zram * zram,struct page * page,u32 index)1957 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
1958 {
1959 struct zcomp_strm *zstrm;
1960 unsigned long handle;
1961 unsigned int size;
1962 void *src, *dst;
1963 int ret, prio;
1964
1965 handle = zram_get_handle(zram, index);
1966 size = zram_get_obj_size(zram, index);
1967 prio = zram_get_priority(zram, index);
1968
1969 zstrm = zcomp_stream_get(zram->comps[prio]);
1970 src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
1971 dst = kmap_local_page(page);
1972 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
1973 kunmap_local(dst);
1974 zs_obj_read_end(zram->mem_pool, handle, src);
1975 zcomp_stream_put(zstrm);
1976
1977 return ret;
1978 }
1979
1980 /*
1981 * Reads (decompresses if needed) a page from zspool (zsmalloc).
1982 * Corresponding ZRAM slot should be locked.
1983 */
zram_read_from_zspool(struct zram * zram,struct page * page,u32 index)1984 static int zram_read_from_zspool(struct zram *zram, struct page *page,
1985 u32 index)
1986 {
1987 if (zram_test_flag(zram, index, ZRAM_SAME) ||
1988 !zram_get_handle(zram, index))
1989 return read_same_filled_page(zram, page, index);
1990
1991 if (!zram_test_flag(zram, index, ZRAM_HUGE))
1992 return read_compressed_page(zram, page, index);
1993 else
1994 return read_incompressible_page(zram, page, index);
1995 }
1996
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)1997 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
1998 struct bio *parent)
1999 {
2000 int ret;
2001
2002 zram_slot_lock(zram, index);
2003 if (!zram_test_flag(zram, index, ZRAM_WB)) {
2004 /* Slot should be locked through out the function call */
2005 ret = zram_read_from_zspool(zram, page, index);
2006 zram_slot_unlock(zram, index);
2007 } else {
2008 unsigned long blk_idx = zram_get_handle(zram, index);
2009
2010 /*
2011 * The slot should be unlocked before reading from the backing
2012 * device.
2013 */
2014 zram_slot_unlock(zram, index);
2015 ret = read_from_bdev(zram, page, blk_idx, parent);
2016 }
2017
2018 /* Should NEVER happen. Return bio error if it does. */
2019 if (WARN_ON(ret < 0))
2020 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
2021
2022 return ret;
2023 }
2024
2025 /*
2026 * Use a temporary buffer to decompress the page, as the decompressor
2027 * always expects a full page for the output.
2028 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)2029 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
2030 u32 index, int offset)
2031 {
2032 struct page *page = alloc_page(GFP_NOIO);
2033 int ret;
2034
2035 if (!page)
2036 return -ENOMEM;
2037 ret = zram_read_page(zram, page, index, NULL);
2038 if (likely(!ret))
2039 memcpy_to_bvec(bvec, page_address(page) + offset);
2040 __free_page(page);
2041 return ret;
2042 }
2043
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2044 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
2045 u32 index, int offset, struct bio *bio)
2046 {
2047 if (is_partial_io(bvec))
2048 return zram_bvec_read_partial(zram, bvec, index, offset);
2049 return zram_read_page(zram, bvec->bv_page, index, bio);
2050 }
2051
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)2052 static int write_same_filled_page(struct zram *zram, unsigned long fill,
2053 u32 index)
2054 {
2055 zram_slot_lock(zram, index);
2056 zram_free_page(zram, index);
2057 zram_set_flag(zram, index, ZRAM_SAME);
2058 zram_set_handle(zram, index, fill);
2059 zram_slot_unlock(zram, index);
2060
2061 atomic64_inc(&zram->stats.same_pages);
2062 atomic64_inc(&zram->stats.pages_stored);
2063
2064 return 0;
2065 }
2066
write_incompressible_page(struct zram * zram,struct page * page,u32 index)2067 static int write_incompressible_page(struct zram *zram, struct page *page,
2068 u32 index)
2069 {
2070 unsigned long handle;
2071 void *src;
2072
2073 /*
2074 * This function is called from preemptible context so we don't need
2075 * to do optimistic and fallback to pessimistic handle allocation,
2076 * like we do for compressible pages.
2077 */
2078 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
2079 GFP_NOIO | __GFP_NOWARN |
2080 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2081 if (IS_ERR_VALUE(handle))
2082 return PTR_ERR((void *)handle);
2083
2084 if (!zram_can_store_page(zram)) {
2085 zs_free(zram->mem_pool, handle);
2086 return -ENOMEM;
2087 }
2088
2089 src = kmap_local_page(page);
2090 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
2091 kunmap_local(src);
2092
2093 zram_slot_lock(zram, index);
2094 zram_free_page(zram, index);
2095 zram_set_flag(zram, index, ZRAM_HUGE);
2096 zram_set_handle(zram, index, handle);
2097 zram_set_obj_size(zram, index, PAGE_SIZE);
2098 zram_slot_unlock(zram, index);
2099
2100 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
2101 atomic64_inc(&zram->stats.huge_pages);
2102 atomic64_inc(&zram->stats.huge_pages_since);
2103 atomic64_inc(&zram->stats.pages_stored);
2104
2105 return 0;
2106 }
2107
zram_write_page(struct zram * zram,struct page * page,u32 index)2108 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
2109 {
2110 int ret = 0;
2111 unsigned long handle;
2112 unsigned int comp_len;
2113 void *mem;
2114 struct zcomp_strm *zstrm;
2115 unsigned long element;
2116 bool same_filled;
2117
2118 mem = kmap_local_page(page);
2119 same_filled = page_same_filled(mem, &element);
2120 kunmap_local(mem);
2121 if (same_filled)
2122 return write_same_filled_page(zram, element, index);
2123
2124 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2125 mem = kmap_local_page(page);
2126 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
2127 mem, &comp_len);
2128 kunmap_local(mem);
2129
2130 if (unlikely(ret)) {
2131 zcomp_stream_put(zstrm);
2132 pr_err("Compression failed! err=%d\n", ret);
2133 return ret;
2134 }
2135
2136 if (comp_len >= huge_class_size) {
2137 zcomp_stream_put(zstrm);
2138 return write_incompressible_page(zram, page, index);
2139 }
2140
2141 handle = zs_malloc(zram->mem_pool, comp_len,
2142 GFP_NOIO | __GFP_NOWARN |
2143 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2144 if (IS_ERR_VALUE(handle)) {
2145 zcomp_stream_put(zstrm);
2146 return PTR_ERR((void *)handle);
2147 }
2148
2149 if (!zram_can_store_page(zram)) {
2150 zcomp_stream_put(zstrm);
2151 zs_free(zram->mem_pool, handle);
2152 return -ENOMEM;
2153 }
2154
2155 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
2156 zcomp_stream_put(zstrm);
2157
2158 zram_slot_lock(zram, index);
2159 zram_free_page(zram, index);
2160 zram_set_handle(zram, index, handle);
2161 zram_set_obj_size(zram, index, comp_len);
2162 zram_slot_unlock(zram, index);
2163
2164 /* Update stats */
2165 atomic64_inc(&zram->stats.pages_stored);
2166 atomic64_add(comp_len, &zram->stats.compr_data_size);
2167
2168 return ret;
2169 }
2170
2171 /*
2172 * This is a partial IO. Read the full page before writing the changes.
2173 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2174 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
2175 u32 index, int offset, struct bio *bio)
2176 {
2177 struct page *page = alloc_page(GFP_NOIO);
2178 int ret;
2179
2180 if (!page)
2181 return -ENOMEM;
2182
2183 ret = zram_read_page(zram, page, index, bio);
2184 if (!ret) {
2185 memcpy_from_bvec(page_address(page) + offset, bvec);
2186 ret = zram_write_page(zram, page, index);
2187 }
2188 __free_page(page);
2189 return ret;
2190 }
2191
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2192 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
2193 u32 index, int offset, struct bio *bio)
2194 {
2195 if (is_partial_io(bvec))
2196 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
2197 return zram_write_page(zram, bvec->bv_page, index);
2198 }
2199
2200 #ifdef CONFIG_ZRAM_MULTI_COMP
2201 #define RECOMPRESS_IDLE (1 << 0)
2202 #define RECOMPRESS_HUGE (1 << 1)
2203
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio_max,struct zram_pp_ctl * ctl)2204 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
2205 struct zram_pp_ctl *ctl)
2206 {
2207 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
2208 unsigned long index;
2209
2210 for (index = 0; index < nr_pages; index++) {
2211 bool ok = true;
2212
2213 zram_slot_lock(zram, index);
2214 if (!zram_allocated(zram, index))
2215 goto next;
2216
2217 if (mode & RECOMPRESS_IDLE &&
2218 !zram_test_flag(zram, index, ZRAM_IDLE))
2219 goto next;
2220
2221 if (mode & RECOMPRESS_HUGE &&
2222 !zram_test_flag(zram, index, ZRAM_HUGE))
2223 goto next;
2224
2225 if (zram_test_flag(zram, index, ZRAM_WB) ||
2226 zram_test_flag(zram, index, ZRAM_SAME) ||
2227 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
2228 goto next;
2229
2230 /* Already compressed with same of higher priority */
2231 if (zram_get_priority(zram, index) + 1 >= prio_max)
2232 goto next;
2233
2234 ok = place_pp_slot(zram, ctl, index);
2235 next:
2236 zram_slot_unlock(zram, index);
2237 if (!ok)
2238 break;
2239 }
2240
2241 return 0;
2242 }
2243
2244 /*
2245 * This function will decompress (unless it's ZRAM_HUGE) the page and then
2246 * attempt to compress it using provided compression algorithm priority
2247 * (which is potentially more effective).
2248 *
2249 * Corresponding ZRAM slot should be locked.
2250 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio,u32 prio_max)2251 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
2252 u64 *num_recomp_pages, u32 threshold, u32 prio,
2253 u32 prio_max)
2254 {
2255 struct zcomp_strm *zstrm = NULL;
2256 unsigned long handle_old;
2257 unsigned long handle_new;
2258 unsigned int comp_len_old;
2259 unsigned int comp_len_new;
2260 unsigned int class_index_old;
2261 unsigned int class_index_new;
2262 void *src;
2263 int ret = 0;
2264
2265 handle_old = zram_get_handle(zram, index);
2266 if (!handle_old)
2267 return -EINVAL;
2268
2269 comp_len_old = zram_get_obj_size(zram, index);
2270 /*
2271 * Do not recompress objects that are already "small enough".
2272 */
2273 if (comp_len_old < threshold)
2274 return 0;
2275
2276 ret = zram_read_from_zspool(zram, page, index);
2277 if (ret)
2278 return ret;
2279
2280 /*
2281 * We touched this entry so mark it as non-IDLE. This makes sure that
2282 * we don't preserve IDLE flag and don't incorrectly pick this entry
2283 * for different post-processing type (e.g. writeback).
2284 */
2285 zram_clear_flag(zram, index, ZRAM_IDLE);
2286
2287 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2288
2289 prio = max(prio, zram_get_priority(zram, index) + 1);
2290 /*
2291 * Recompression slots scan should not select slots that are
2292 * already compressed with a higher priority algorithm, but
2293 * just in case
2294 */
2295 if (prio >= prio_max)
2296 return 0;
2297
2298 /*
2299 * Iterate the secondary comp algorithms list (in order of priority)
2300 * and try to recompress the page.
2301 */
2302 for (; prio < prio_max; prio++) {
2303 if (!zram->comps[prio])
2304 continue;
2305
2306 zstrm = zcomp_stream_get(zram->comps[prio]);
2307 src = kmap_local_page(page);
2308 ret = zcomp_compress(zram->comps[prio], zstrm,
2309 src, &comp_len_new);
2310 kunmap_local(src);
2311
2312 if (ret) {
2313 zcomp_stream_put(zstrm);
2314 zstrm = NULL;
2315 break;
2316 }
2317
2318 class_index_new = zs_lookup_class_index(zram->mem_pool,
2319 comp_len_new);
2320
2321 /* Continue until we make progress */
2322 if (class_index_new >= class_index_old ||
2323 (threshold && comp_len_new >= threshold)) {
2324 zcomp_stream_put(zstrm);
2325 zstrm = NULL;
2326 continue;
2327 }
2328
2329 /* Recompression was successful so break out */
2330 break;
2331 }
2332
2333 /*
2334 * Decrement the limit (if set) on pages we can recompress, even
2335 * when current recompression was unsuccessful or did not compress
2336 * the page below the threshold, because we still spent resources
2337 * on it.
2338 */
2339 if (*num_recomp_pages)
2340 *num_recomp_pages -= 1;
2341
2342 /* Compression error */
2343 if (ret)
2344 return ret;
2345
2346 if (!zstrm) {
2347 /*
2348 * Secondary algorithms failed to re-compress the page
2349 * in a way that would save memory.
2350 *
2351 * Mark the object incompressible if the max-priority
2352 * algorithm couldn't re-compress it.
2353 */
2354 if (prio < zram->num_active_comps)
2355 return 0;
2356 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2357 return 0;
2358 }
2359
2360 /*
2361 * We are holding per-CPU stream mutex and entry lock so better
2362 * avoid direct reclaim. Allocation error is not fatal since
2363 * we still have the old object in the mem_pool.
2364 *
2365 * XXX: technically, the node we really want here is the node that holds
2366 * the original compressed data. But that would require us to modify
2367 * zsmalloc API to return this information. For now, we will make do with
2368 * the node of the page allocated for recompression.
2369 */
2370 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2371 GFP_NOIO | __GFP_NOWARN |
2372 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2373 if (IS_ERR_VALUE(handle_new)) {
2374 zcomp_stream_put(zstrm);
2375 return PTR_ERR((void *)handle_new);
2376 }
2377
2378 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2379 zcomp_stream_put(zstrm);
2380
2381 zram_free_page(zram, index);
2382 zram_set_handle(zram, index, handle_new);
2383 zram_set_obj_size(zram, index, comp_len_new);
2384 zram_set_priority(zram, index, prio);
2385
2386 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2387 atomic64_inc(&zram->stats.pages_stored);
2388
2389 return 0;
2390 }
2391
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2392 static ssize_t recompress_store(struct device *dev,
2393 struct device_attribute *attr,
2394 const char *buf, size_t len)
2395 {
2396 struct zram *zram = dev_to_zram(dev);
2397 char *args, *param, *val, *algo = NULL;
2398 u64 num_recomp_pages = ULLONG_MAX;
2399 struct zram_pp_ctl *ctl = NULL;
2400 struct zram_pp_slot *pps;
2401 u32 mode = 0, threshold = 0;
2402 u32 prio, prio_max;
2403 struct page *page = NULL;
2404 ssize_t ret;
2405
2406 prio = ZRAM_SECONDARY_COMP;
2407 prio_max = zram->num_active_comps;
2408
2409 args = skip_spaces(buf);
2410 while (*args) {
2411 args = next_arg(args, ¶m, &val);
2412
2413 if (!val || !*val)
2414 return -EINVAL;
2415
2416 if (!strcmp(param, "type")) {
2417 if (!strcmp(val, "idle"))
2418 mode = RECOMPRESS_IDLE;
2419 if (!strcmp(val, "huge"))
2420 mode = RECOMPRESS_HUGE;
2421 if (!strcmp(val, "huge_idle"))
2422 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2423 continue;
2424 }
2425
2426 if (!strcmp(param, "max_pages")) {
2427 /*
2428 * Limit the number of entries (pages) we attempt to
2429 * recompress.
2430 */
2431 ret = kstrtoull(val, 10, &num_recomp_pages);
2432 if (ret)
2433 return ret;
2434 continue;
2435 }
2436
2437 if (!strcmp(param, "threshold")) {
2438 /*
2439 * We will re-compress only idle objects equal or
2440 * greater in size than watermark.
2441 */
2442 ret = kstrtouint(val, 10, &threshold);
2443 if (ret)
2444 return ret;
2445 continue;
2446 }
2447
2448 if (!strcmp(param, "algo")) {
2449 algo = val;
2450 continue;
2451 }
2452
2453 if (!strcmp(param, "priority")) {
2454 ret = kstrtouint(val, 10, &prio);
2455 if (ret)
2456 return ret;
2457
2458 if (prio == ZRAM_PRIMARY_COMP)
2459 prio = ZRAM_SECONDARY_COMP;
2460
2461 prio_max = prio + 1;
2462 continue;
2463 }
2464 }
2465
2466 if (threshold >= huge_class_size)
2467 return -EINVAL;
2468
2469 down_read(&zram->init_lock);
2470 if (!init_done(zram)) {
2471 ret = -EINVAL;
2472 goto release_init_lock;
2473 }
2474
2475 /* Do not permit concurrent post-processing actions. */
2476 if (atomic_xchg(&zram->pp_in_progress, 1)) {
2477 up_read(&zram->init_lock);
2478 return -EAGAIN;
2479 }
2480
2481 if (algo) {
2482 bool found = false;
2483
2484 for (; prio < ZRAM_MAX_COMPS; prio++) {
2485 if (!zram->comp_algs[prio])
2486 continue;
2487
2488 if (!strcmp(zram->comp_algs[prio], algo)) {
2489 prio_max = prio + 1;
2490 found = true;
2491 break;
2492 }
2493 }
2494
2495 if (!found) {
2496 ret = -EINVAL;
2497 goto release_init_lock;
2498 }
2499 }
2500
2501 prio_max = min(prio_max, (u32)zram->num_active_comps);
2502 if (prio >= prio_max) {
2503 ret = -EINVAL;
2504 goto release_init_lock;
2505 }
2506
2507 page = alloc_page(GFP_KERNEL);
2508 if (!page) {
2509 ret = -ENOMEM;
2510 goto release_init_lock;
2511 }
2512
2513 ctl = init_pp_ctl();
2514 if (!ctl) {
2515 ret = -ENOMEM;
2516 goto release_init_lock;
2517 }
2518
2519 scan_slots_for_recompress(zram, mode, prio_max, ctl);
2520
2521 ret = len;
2522 while ((pps = select_pp_slot(ctl))) {
2523 int err = 0;
2524
2525 if (!num_recomp_pages)
2526 break;
2527
2528 zram_slot_lock(zram, pps->index);
2529 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
2530 goto next;
2531
2532 err = recompress_slot(zram, pps->index, page,
2533 &num_recomp_pages, threshold,
2534 prio, prio_max);
2535 next:
2536 zram_slot_unlock(zram, pps->index);
2537 release_pp_slot(zram, pps);
2538
2539 if (err) {
2540 ret = err;
2541 break;
2542 }
2543
2544 cond_resched();
2545 }
2546
2547 release_init_lock:
2548 if (page)
2549 __free_page(page);
2550 release_pp_ctl(zram, ctl);
2551 atomic_set(&zram->pp_in_progress, 0);
2552 up_read(&zram->init_lock);
2553 return ret;
2554 }
2555 #endif
2556
zram_bio_discard(struct zram * zram,struct bio * bio)2557 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2558 {
2559 size_t n = bio->bi_iter.bi_size;
2560 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2561 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2562 SECTOR_SHIFT;
2563
2564 /*
2565 * zram manages data in physical block size units. Because logical block
2566 * size isn't identical with physical block size on some arch, we
2567 * could get a discard request pointing to a specific offset within a
2568 * certain physical block. Although we can handle this request by
2569 * reading that physiclal block and decompressing and partially zeroing
2570 * and re-compressing and then re-storing it, this isn't reasonable
2571 * because our intent with a discard request is to save memory. So
2572 * skipping this logical block is appropriate here.
2573 */
2574 if (offset) {
2575 if (n <= (PAGE_SIZE - offset))
2576 return;
2577
2578 n -= (PAGE_SIZE - offset);
2579 index++;
2580 }
2581
2582 while (n >= PAGE_SIZE) {
2583 zram_slot_lock(zram, index);
2584 zram_free_page(zram, index);
2585 zram_slot_unlock(zram, index);
2586 atomic64_inc(&zram->stats.notify_free);
2587 index++;
2588 n -= PAGE_SIZE;
2589 }
2590
2591 bio_endio(bio);
2592 }
2593
zram_bio_read(struct zram * zram,struct bio * bio)2594 static void zram_bio_read(struct zram *zram, struct bio *bio)
2595 {
2596 unsigned long start_time = bio_start_io_acct(bio);
2597 struct bvec_iter iter = bio->bi_iter;
2598
2599 do {
2600 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2601 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2602 SECTOR_SHIFT;
2603 struct bio_vec bv = bio_iter_iovec(bio, iter);
2604
2605 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2606
2607 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2608 atomic64_inc(&zram->stats.failed_reads);
2609 bio->bi_status = BLK_STS_IOERR;
2610 break;
2611 }
2612 flush_dcache_page(bv.bv_page);
2613
2614 zram_slot_lock(zram, index);
2615 zram_accessed(zram, index);
2616 zram_slot_unlock(zram, index);
2617
2618 bio_advance_iter_single(bio, &iter, bv.bv_len);
2619 } while (iter.bi_size);
2620
2621 bio_end_io_acct(bio, start_time);
2622 bio_endio(bio);
2623 }
2624
zram_bio_write(struct zram * zram,struct bio * bio)2625 static void zram_bio_write(struct zram *zram, struct bio *bio)
2626 {
2627 unsigned long start_time = bio_start_io_acct(bio);
2628 struct bvec_iter iter = bio->bi_iter;
2629
2630 do {
2631 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2632 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2633 SECTOR_SHIFT;
2634 struct bio_vec bv = bio_iter_iovec(bio, iter);
2635
2636 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2637
2638 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2639 atomic64_inc(&zram->stats.failed_writes);
2640 bio->bi_status = BLK_STS_IOERR;
2641 break;
2642 }
2643
2644 zram_slot_lock(zram, index);
2645 zram_accessed(zram, index);
2646 zram_slot_unlock(zram, index);
2647
2648 bio_advance_iter_single(bio, &iter, bv.bv_len);
2649 } while (iter.bi_size);
2650
2651 bio_end_io_acct(bio, start_time);
2652 bio_endio(bio);
2653 }
2654
2655 /*
2656 * Handler function for all zram I/O requests.
2657 */
zram_submit_bio(struct bio * bio)2658 static void zram_submit_bio(struct bio *bio)
2659 {
2660 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2661
2662 switch (bio_op(bio)) {
2663 case REQ_OP_READ:
2664 zram_bio_read(zram, bio);
2665 break;
2666 case REQ_OP_WRITE:
2667 zram_bio_write(zram, bio);
2668 break;
2669 case REQ_OP_DISCARD:
2670 case REQ_OP_WRITE_ZEROES:
2671 zram_bio_discard(zram, bio);
2672 break;
2673 default:
2674 WARN_ON_ONCE(1);
2675 bio_endio(bio);
2676 }
2677 }
2678
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2679 static void zram_slot_free_notify(struct block_device *bdev,
2680 unsigned long index)
2681 {
2682 struct zram *zram;
2683
2684 zram = bdev->bd_disk->private_data;
2685
2686 atomic64_inc(&zram->stats.notify_free);
2687 if (!zram_slot_trylock(zram, index)) {
2688 atomic64_inc(&zram->stats.miss_free);
2689 return;
2690 }
2691
2692 zram_free_page(zram, index);
2693 zram_slot_unlock(zram, index);
2694 }
2695
zram_comp_params_reset(struct zram * zram)2696 static void zram_comp_params_reset(struct zram *zram)
2697 {
2698 u32 prio;
2699
2700 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2701 comp_params_reset(zram, prio);
2702 }
2703 }
2704
zram_destroy_comps(struct zram * zram)2705 static void zram_destroy_comps(struct zram *zram)
2706 {
2707 u32 prio;
2708
2709 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2710 struct zcomp *comp = zram->comps[prio];
2711
2712 zram->comps[prio] = NULL;
2713 if (!comp)
2714 continue;
2715 zcomp_destroy(comp);
2716 zram->num_active_comps--;
2717 }
2718
2719 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2720 /* Do not free statically defined compression algorithms */
2721 if (zram->comp_algs[prio] != default_compressor)
2722 kfree(zram->comp_algs[prio]);
2723 zram->comp_algs[prio] = NULL;
2724 }
2725
2726 zram_comp_params_reset(zram);
2727 }
2728
zram_reset_device(struct zram * zram)2729 static void zram_reset_device(struct zram *zram)
2730 {
2731 down_write(&zram->init_lock);
2732
2733 zram->limit_pages = 0;
2734
2735 set_capacity_and_notify(zram->disk, 0);
2736 part_stat_set_all(zram->disk->part0, 0);
2737
2738 /* I/O operation under all of CPU are done so let's free */
2739 zram_meta_free(zram, zram->disksize);
2740 zram->disksize = 0;
2741 zram_destroy_comps(zram);
2742 memset(&zram->stats, 0, sizeof(zram->stats));
2743 atomic_set(&zram->pp_in_progress, 0);
2744 reset_bdev(zram);
2745
2746 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2747 up_write(&zram->init_lock);
2748 }
2749
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2750 static ssize_t disksize_store(struct device *dev,
2751 struct device_attribute *attr, const char *buf, size_t len)
2752 {
2753 u64 disksize;
2754 struct zcomp *comp;
2755 struct zram *zram = dev_to_zram(dev);
2756 int err;
2757 u32 prio;
2758
2759 disksize = memparse(buf, NULL);
2760 if (!disksize)
2761 return -EINVAL;
2762
2763 down_write(&zram->init_lock);
2764 if (init_done(zram)) {
2765 pr_info("Cannot change disksize for initialized device\n");
2766 err = -EBUSY;
2767 goto out_unlock;
2768 }
2769
2770 disksize = PAGE_ALIGN(disksize);
2771 if (!zram_meta_alloc(zram, disksize)) {
2772 err = -ENOMEM;
2773 goto out_unlock;
2774 }
2775
2776 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2777 if (!zram->comp_algs[prio])
2778 continue;
2779
2780 comp = zcomp_create(zram->comp_algs[prio],
2781 &zram->params[prio]);
2782 if (IS_ERR(comp)) {
2783 pr_err("Cannot initialise %s compressing backend\n",
2784 zram->comp_algs[prio]);
2785 err = PTR_ERR(comp);
2786 goto out_free_comps;
2787 }
2788
2789 zram->comps[prio] = comp;
2790 zram->num_active_comps++;
2791 }
2792 zram->disksize = disksize;
2793 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2794 up_write(&zram->init_lock);
2795
2796 return len;
2797
2798 out_free_comps:
2799 zram_destroy_comps(zram);
2800 zram_meta_free(zram, disksize);
2801 out_unlock:
2802 up_write(&zram->init_lock);
2803 return err;
2804 }
2805
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2806 static ssize_t reset_store(struct device *dev,
2807 struct device_attribute *attr, const char *buf, size_t len)
2808 {
2809 int ret;
2810 unsigned short do_reset;
2811 struct zram *zram;
2812 struct gendisk *disk;
2813
2814 ret = kstrtou16(buf, 10, &do_reset);
2815 if (ret)
2816 return ret;
2817
2818 if (!do_reset)
2819 return -EINVAL;
2820
2821 zram = dev_to_zram(dev);
2822 disk = zram->disk;
2823
2824 mutex_lock(&disk->open_mutex);
2825 /* Do not reset an active device or claimed device */
2826 if (disk_openers(disk) || zram->claim) {
2827 mutex_unlock(&disk->open_mutex);
2828 return -EBUSY;
2829 }
2830
2831 /* From now on, anyone can't open /dev/zram[0-9] */
2832 zram->claim = true;
2833 mutex_unlock(&disk->open_mutex);
2834
2835 /* Make sure all the pending I/O are finished */
2836 sync_blockdev(disk->part0);
2837 zram_reset_device(zram);
2838
2839 mutex_lock(&disk->open_mutex);
2840 zram->claim = false;
2841 mutex_unlock(&disk->open_mutex);
2842
2843 return len;
2844 }
2845
zram_open(struct gendisk * disk,blk_mode_t mode)2846 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2847 {
2848 struct zram *zram = disk->private_data;
2849
2850 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2851
2852 /* zram was claimed to reset so open request fails */
2853 if (zram->claim)
2854 return -EBUSY;
2855 return 0;
2856 }
2857
2858 static const struct block_device_operations zram_devops = {
2859 .open = zram_open,
2860 .submit_bio = zram_submit_bio,
2861 .swap_slot_free_notify = zram_slot_free_notify,
2862 .owner = THIS_MODULE
2863 };
2864
2865 static DEVICE_ATTR_WO(compact);
2866 static DEVICE_ATTR_RW(disksize);
2867 static DEVICE_ATTR_RO(initstate);
2868 static DEVICE_ATTR_WO(reset);
2869 static DEVICE_ATTR_WO(mem_limit);
2870 static DEVICE_ATTR_WO(mem_used_max);
2871 static DEVICE_ATTR_WO(idle);
2872 static DEVICE_ATTR_RW(comp_algorithm);
2873 #ifdef CONFIG_ZRAM_WRITEBACK
2874 static DEVICE_ATTR_RW(backing_dev);
2875 static DEVICE_ATTR_WO(writeback);
2876 static DEVICE_ATTR_RW(writeback_limit);
2877 static DEVICE_ATTR_RW(writeback_limit_enable);
2878 static DEVICE_ATTR_RW(writeback_batch_size);
2879 #endif
2880 #ifdef CONFIG_ZRAM_MULTI_COMP
2881 static DEVICE_ATTR_RW(recomp_algorithm);
2882 static DEVICE_ATTR_WO(recompress);
2883 #endif
2884 static DEVICE_ATTR_WO(algorithm_params);
2885
2886 static struct attribute *zram_disk_attrs[] = {
2887 &dev_attr_disksize.attr,
2888 &dev_attr_initstate.attr,
2889 &dev_attr_reset.attr,
2890 &dev_attr_compact.attr,
2891 &dev_attr_mem_limit.attr,
2892 &dev_attr_mem_used_max.attr,
2893 &dev_attr_idle.attr,
2894 &dev_attr_comp_algorithm.attr,
2895 #ifdef CONFIG_ZRAM_WRITEBACK
2896 &dev_attr_backing_dev.attr,
2897 &dev_attr_writeback.attr,
2898 &dev_attr_writeback_limit.attr,
2899 &dev_attr_writeback_limit_enable.attr,
2900 &dev_attr_writeback_batch_size.attr,
2901 #endif
2902 &dev_attr_io_stat.attr,
2903 &dev_attr_mm_stat.attr,
2904 #ifdef CONFIG_ZRAM_WRITEBACK
2905 &dev_attr_bd_stat.attr,
2906 #endif
2907 &dev_attr_debug_stat.attr,
2908 #ifdef CONFIG_ZRAM_MULTI_COMP
2909 &dev_attr_recomp_algorithm.attr,
2910 &dev_attr_recompress.attr,
2911 #endif
2912 &dev_attr_algorithm_params.attr,
2913 NULL,
2914 };
2915
2916 ATTRIBUTE_GROUPS(zram_disk);
2917
2918 /*
2919 * Allocate and initialize new zram device. the function returns
2920 * '>= 0' device_id upon success, and negative value otherwise.
2921 */
zram_add(void)2922 static int zram_add(void)
2923 {
2924 struct queue_limits lim = {
2925 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
2926 /*
2927 * To ensure that we always get PAGE_SIZE aligned and
2928 * n*PAGE_SIZED sized I/O requests.
2929 */
2930 .physical_block_size = PAGE_SIZE,
2931 .io_min = PAGE_SIZE,
2932 .io_opt = PAGE_SIZE,
2933 .max_hw_discard_sectors = UINT_MAX,
2934 /*
2935 * zram_bio_discard() will clear all logical blocks if logical
2936 * block size is identical with physical block size(PAGE_SIZE).
2937 * But if it is different, we will skip discarding some parts of
2938 * logical blocks in the part of the request range which isn't
2939 * aligned to physical block size. So we can't ensure that all
2940 * discarded logical blocks are zeroed.
2941 */
2942 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
2943 .max_write_zeroes_sectors = UINT_MAX,
2944 #endif
2945 .features = BLK_FEAT_STABLE_WRITES |
2946 BLK_FEAT_SYNCHRONOUS,
2947 };
2948 struct zram *zram;
2949 int ret, device_id;
2950
2951 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2952 if (!zram)
2953 return -ENOMEM;
2954
2955 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
2956 if (ret < 0)
2957 goto out_free_dev;
2958 device_id = ret;
2959
2960 init_rwsem(&zram->init_lock);
2961 #ifdef CONFIG_ZRAM_WRITEBACK
2962 zram->wb_batch_size = 32;
2963 #endif
2964
2965 /* gendisk structure */
2966 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
2967 if (IS_ERR(zram->disk)) {
2968 pr_err("Error allocating disk structure for device %d\n",
2969 device_id);
2970 ret = PTR_ERR(zram->disk);
2971 goto out_free_idr;
2972 }
2973
2974 zram->disk->major = zram_major;
2975 zram->disk->first_minor = device_id;
2976 zram->disk->minors = 1;
2977 zram->disk->flags |= GENHD_FL_NO_PART;
2978 zram->disk->fops = &zram_devops;
2979 zram->disk->private_data = zram;
2980 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
2981 atomic_set(&zram->pp_in_progress, 0);
2982 zram_comp_params_reset(zram);
2983 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2984
2985 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2986 set_capacity(zram->disk, 0);
2987 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
2988 if (ret)
2989 goto out_cleanup_disk;
2990
2991 zram_debugfs_register(zram);
2992 pr_info("Added device: %s\n", zram->disk->disk_name);
2993 return device_id;
2994
2995 out_cleanup_disk:
2996 put_disk(zram->disk);
2997 out_free_idr:
2998 idr_remove(&zram_index_idr, device_id);
2999 out_free_dev:
3000 kfree(zram);
3001 return ret;
3002 }
3003
zram_remove(struct zram * zram)3004 static int zram_remove(struct zram *zram)
3005 {
3006 bool claimed;
3007
3008 mutex_lock(&zram->disk->open_mutex);
3009 if (disk_openers(zram->disk)) {
3010 mutex_unlock(&zram->disk->open_mutex);
3011 return -EBUSY;
3012 }
3013
3014 claimed = zram->claim;
3015 if (!claimed)
3016 zram->claim = true;
3017 mutex_unlock(&zram->disk->open_mutex);
3018
3019 zram_debugfs_unregister(zram);
3020
3021 if (claimed) {
3022 /*
3023 * If we were claimed by reset_store(), del_gendisk() will
3024 * wait until reset_store() is done, so nothing need to do.
3025 */
3026 ;
3027 } else {
3028 /* Make sure all the pending I/O are finished */
3029 sync_blockdev(zram->disk->part0);
3030 zram_reset_device(zram);
3031 }
3032
3033 pr_info("Removed device: %s\n", zram->disk->disk_name);
3034
3035 del_gendisk(zram->disk);
3036
3037 /* del_gendisk drains pending reset_store */
3038 WARN_ON_ONCE(claimed && zram->claim);
3039
3040 /*
3041 * disksize_store() may be called in between zram_reset_device()
3042 * and del_gendisk(), so run the last reset to avoid leaking
3043 * anything allocated with disksize_store()
3044 */
3045 zram_reset_device(zram);
3046
3047 put_disk(zram->disk);
3048 kfree(zram);
3049 return 0;
3050 }
3051
3052 /* zram-control sysfs attributes */
3053
3054 /*
3055 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
3056 * sense that reading from this file does alter the state of your system -- it
3057 * creates a new un-initialized zram device and returns back this device's
3058 * device_id (or an error code if it fails to create a new device).
3059 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)3060 static ssize_t hot_add_show(const struct class *class,
3061 const struct class_attribute *attr,
3062 char *buf)
3063 {
3064 int ret;
3065
3066 mutex_lock(&zram_index_mutex);
3067 ret = zram_add();
3068 mutex_unlock(&zram_index_mutex);
3069
3070 if (ret < 0)
3071 return ret;
3072 return sysfs_emit(buf, "%d\n", ret);
3073 }
3074 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
3075 static struct class_attribute class_attr_hot_add =
3076 __ATTR(hot_add, 0400, hot_add_show, NULL);
3077
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)3078 static ssize_t hot_remove_store(const struct class *class,
3079 const struct class_attribute *attr,
3080 const char *buf,
3081 size_t count)
3082 {
3083 struct zram *zram;
3084 int ret, dev_id;
3085
3086 /* dev_id is gendisk->first_minor, which is `int' */
3087 ret = kstrtoint(buf, 10, &dev_id);
3088 if (ret)
3089 return ret;
3090 if (dev_id < 0)
3091 return -EINVAL;
3092
3093 mutex_lock(&zram_index_mutex);
3094
3095 zram = idr_find(&zram_index_idr, dev_id);
3096 if (zram) {
3097 ret = zram_remove(zram);
3098 if (!ret)
3099 idr_remove(&zram_index_idr, dev_id);
3100 } else {
3101 ret = -ENODEV;
3102 }
3103
3104 mutex_unlock(&zram_index_mutex);
3105 return ret ? ret : count;
3106 }
3107 static CLASS_ATTR_WO(hot_remove);
3108
3109 static struct attribute *zram_control_class_attrs[] = {
3110 &class_attr_hot_add.attr,
3111 &class_attr_hot_remove.attr,
3112 NULL,
3113 };
3114 ATTRIBUTE_GROUPS(zram_control_class);
3115
3116 static struct class zram_control_class = {
3117 .name = "zram-control",
3118 .class_groups = zram_control_class_groups,
3119 };
3120
zram_remove_cb(int id,void * ptr,void * data)3121 static int zram_remove_cb(int id, void *ptr, void *data)
3122 {
3123 WARN_ON_ONCE(zram_remove(ptr));
3124 return 0;
3125 }
3126
destroy_devices(void)3127 static void destroy_devices(void)
3128 {
3129 class_unregister(&zram_control_class);
3130 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
3131 zram_debugfs_destroy();
3132 idr_destroy(&zram_index_idr);
3133 unregister_blkdev(zram_major, "zram");
3134 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3135 }
3136
zram_init(void)3137 static int __init zram_init(void)
3138 {
3139 struct zram_table_entry zram_te;
3140 int ret;
3141
3142 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
3143
3144 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
3145 zcomp_cpu_up_prepare, zcomp_cpu_dead);
3146 if (ret < 0)
3147 return ret;
3148
3149 ret = class_register(&zram_control_class);
3150 if (ret) {
3151 pr_err("Unable to register zram-control class\n");
3152 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3153 return ret;
3154 }
3155
3156 zram_debugfs_create();
3157 zram_major = register_blkdev(0, "zram");
3158 if (zram_major <= 0) {
3159 pr_err("Unable to get major number\n");
3160 class_unregister(&zram_control_class);
3161 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3162 return -EBUSY;
3163 }
3164
3165 while (num_devices != 0) {
3166 mutex_lock(&zram_index_mutex);
3167 ret = zram_add();
3168 mutex_unlock(&zram_index_mutex);
3169 if (ret < 0)
3170 goto out_error;
3171 num_devices--;
3172 }
3173
3174 return 0;
3175
3176 out_error:
3177 destroy_devices();
3178 return ret;
3179 }
3180
zram_exit(void)3181 static void __exit zram_exit(void)
3182 {
3183 destroy_devices();
3184 }
3185
3186 module_init(zram_init);
3187 module_exit(zram_exit);
3188
3189 module_param(num_devices, uint, 0);
3190 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
3191
3192 MODULE_LICENSE("Dual BSD/GPL");
3193 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
3194 MODULE_DESCRIPTION("Compressed RAM Block Device");
3195