1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define pr_fmt(fmt) "zram: " fmt
16
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/bio.h>
20 #include <linux/bitops.h>
21 #include <linux/blkdev.h>
22 #include <linux/buffer_head.h>
23 #include <linux/device.h>
24 #include <linux/highmem.h>
25 #include <linux/slab.h>
26 #include <linux/backing-dev.h>
27 #include <linux/string.h>
28 #include <linux/vmalloc.h>
29 #include <linux/err.h>
30 #include <linux/idr.h>
31 #include <linux/sysfs.h>
32 #include <linux/debugfs.h>
33 #include <linux/cpuhotplug.h>
34 #include <linux/part_stat.h>
35 #include <linux/kernel_read_file.h>
36
37 #include "zram_drv.h"
38
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42
43 static int zram_major;
44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
45
46 #define ZRAM_MAX_ALGO_NAME_SZ 128
47
48 /* Module params (documentation at end) */
49 static unsigned int num_devices = 1;
50 /*
51 * Pages that compress to sizes equals or greater than this are stored
52 * uncompressed in memory.
53 */
54 static size_t huge_class_size;
55
56 static const struct block_device_operations zram_devops;
57
58 static void slot_free(struct zram *zram, u32 index);
59 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
60
slot_lock_init(struct zram * zram,u32 index)61 static void slot_lock_init(struct zram *zram, u32 index)
62 {
63 static struct lock_class_key __key;
64
65 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
66 &__key, 0);
67 }
68
69 /*
70 * entry locking rules:
71 *
72 * 1) Lock is exclusive
73 *
74 * 2) lock() function can sleep waiting for the lock
75 *
76 * 3) Lock owner can sleep
77 *
78 * 4) Use TRY lock variant when in atomic context
79 * - must check return value and handle locking failers
80 */
slot_trylock(struct zram * zram,u32 index)81 static __must_check bool slot_trylock(struct zram *zram, u32 index)
82 {
83 unsigned long *lock = &zram->table[index].__lock;
84
85 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
86 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
87 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
88 return true;
89 }
90
91 return false;
92 }
93
slot_lock(struct zram * zram,u32 index)94 static void slot_lock(struct zram *zram, u32 index)
95 {
96 unsigned long *lock = &zram->table[index].__lock;
97
98 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
99 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
100 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
101 }
102
slot_unlock(struct zram * zram,u32 index)103 static void slot_unlock(struct zram *zram, u32 index)
104 {
105 unsigned long *lock = &zram->table[index].__lock;
106
107 mutex_release(slot_dep_map(zram, index), _RET_IP_);
108 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
109 }
110
init_done(struct zram * zram)111 static inline bool init_done(struct zram *zram)
112 {
113 return zram->disksize;
114 }
115
dev_to_zram(struct device * dev)116 static inline struct zram *dev_to_zram(struct device *dev)
117 {
118 return (struct zram *)dev_to_disk(dev)->private_data;
119 }
120
get_slot_handle(struct zram * zram,u32 index)121 static unsigned long get_slot_handle(struct zram *zram, u32 index)
122 {
123 return zram->table[index].handle;
124 }
125
set_slot_handle(struct zram * zram,u32 index,unsigned long handle)126 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle)
127 {
128 zram->table[index].handle = handle;
129 }
130
test_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)131 static bool test_slot_flag(struct zram *zram, u32 index,
132 enum zram_pageflags flag)
133 {
134 return zram->table[index].attr.flags & BIT(flag);
135 }
136
set_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)137 static void set_slot_flag(struct zram *zram, u32 index,
138 enum zram_pageflags flag)
139 {
140 zram->table[index].attr.flags |= BIT(flag);
141 }
142
clear_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)143 static void clear_slot_flag(struct zram *zram, u32 index,
144 enum zram_pageflags flag)
145 {
146 zram->table[index].attr.flags &= ~BIT(flag);
147 }
148
get_slot_size(struct zram * zram,u32 index)149 static size_t get_slot_size(struct zram *zram, u32 index)
150 {
151 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
152 }
153
set_slot_size(struct zram * zram,u32 index,size_t size)154 static void set_slot_size(struct zram *zram, u32 index, size_t size)
155 {
156 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT;
157
158 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size;
159 }
160
slot_allocated(struct zram * zram,u32 index)161 static inline bool slot_allocated(struct zram *zram, u32 index)
162 {
163 return get_slot_size(zram, index) ||
164 test_slot_flag(zram, index, ZRAM_SAME) ||
165 test_slot_flag(zram, index, ZRAM_WB);
166 }
167
set_slot_comp_priority(struct zram * zram,u32 index,u32 prio)168 static inline void set_slot_comp_priority(struct zram *zram, u32 index,
169 u32 prio)
170 {
171 prio &= ZRAM_COMP_PRIORITY_MASK;
172 /*
173 * Clear previous priority value first, in case if we recompress
174 * further an already recompressed page
175 */
176 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
177 ZRAM_COMP_PRIORITY_BIT1);
178 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
179 }
180
get_slot_comp_priority(struct zram * zram,u32 index)181 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index)
182 {
183 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1;
184
185 return prio & ZRAM_COMP_PRIORITY_MASK;
186 }
187
mark_slot_accessed(struct zram * zram,u32 index)188 static void mark_slot_accessed(struct zram *zram, u32 index)
189 {
190 clear_slot_flag(zram, index, ZRAM_IDLE);
191 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
193 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds();
194 #endif
195 }
196
update_used_max(struct zram * zram,const unsigned long pages)197 static inline void update_used_max(struct zram *zram, const unsigned long pages)
198 {
199 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
200
201 do {
202 if (cur_max >= pages)
203 return;
204 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
205 &cur_max, pages));
206 }
207
zram_can_store_page(struct zram * zram)208 static bool zram_can_store_page(struct zram *zram)
209 {
210 unsigned long alloced_pages;
211
212 alloced_pages = zs_get_total_pages(zram->mem_pool);
213 update_used_max(zram, alloced_pages);
214
215 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
216 }
217
218 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)219 static inline bool is_partial_io(struct bio_vec *bvec)
220 {
221 return bvec->bv_len != PAGE_SIZE;
222 }
223 #define ZRAM_PARTIAL_IO 1
224 #else
is_partial_io(struct bio_vec * bvec)225 static inline bool is_partial_io(struct bio_vec *bvec)
226 {
227 return false;
228 }
229 #endif
230
231 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
232 struct zram_pp_slot {
233 unsigned long index;
234 struct list_head entry;
235 };
236
237 /*
238 * A post-processing bucket is, essentially, a size class, this defines
239 * the range (in bytes) of pp-slots sizes in particular bucket.
240 */
241 #define PP_BUCKET_SIZE_RANGE 64
242 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
243
244 struct zram_pp_ctl {
245 struct list_head pp_buckets[NUM_PP_BUCKETS];
246 };
247
init_pp_ctl(void)248 static struct zram_pp_ctl *init_pp_ctl(void)
249 {
250 struct zram_pp_ctl *ctl;
251 u32 idx;
252
253 ctl = kmalloc_obj(*ctl);
254 if (!ctl)
255 return NULL;
256
257 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
258 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
259 return ctl;
260 }
261
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)262 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
263 {
264 list_del_init(&pps->entry);
265
266 slot_lock(zram, pps->index);
267 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
268 slot_unlock(zram, pps->index);
269
270 kfree(pps);
271 }
272
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)273 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
274 {
275 u32 idx;
276
277 if (!ctl)
278 return;
279
280 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
281 while (!list_empty(&ctl->pp_buckets[idx])) {
282 struct zram_pp_slot *pps;
283
284 pps = list_first_entry(&ctl->pp_buckets[idx],
285 struct zram_pp_slot,
286 entry);
287 release_pp_slot(zram, pps);
288 }
289 }
290
291 kfree(ctl);
292 }
293
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)294 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
295 u32 index)
296 {
297 struct zram_pp_slot *pps;
298 u32 bid;
299
300 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN);
301 if (!pps)
302 return false;
303
304 INIT_LIST_HEAD(&pps->entry);
305 pps->index = index;
306
307 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
308 list_add(&pps->entry, &ctl->pp_buckets[bid]);
309
310 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
311 return true;
312 }
313
select_pp_slot(struct zram_pp_ctl * ctl)314 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
315 {
316 struct zram_pp_slot *pps = NULL;
317 s32 idx = NUM_PP_BUCKETS - 1;
318
319 /* The higher the bucket id the more optimal slot post-processing is */
320 while (idx >= 0) {
321 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
322 struct zram_pp_slot,
323 entry);
324 if (pps)
325 break;
326
327 idx--;
328 }
329 return pps;
330 }
331 #endif
332
zram_fill_page(void * ptr,unsigned long len,unsigned long value)333 static inline void zram_fill_page(void *ptr, unsigned long len,
334 unsigned long value)
335 {
336 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
337 memset_l(ptr, value, len / sizeof(unsigned long));
338 }
339
page_same_filled(void * ptr,unsigned long * element)340 static bool page_same_filled(void *ptr, unsigned long *element)
341 {
342 unsigned long *page;
343 unsigned long val;
344 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
345
346 page = (unsigned long *)ptr;
347 val = page[0];
348
349 if (val != page[last_pos])
350 return false;
351
352 for (pos = 1; pos < last_pos; pos++) {
353 if (val != page[pos])
354 return false;
355 }
356
357 *element = val;
358
359 return true;
360 }
361
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)362 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr,
363 char *buf)
364 {
365 u32 val;
366 struct zram *zram = dev_to_zram(dev);
367
368 guard(rwsem_read)(&zram->dev_lock);
369 val = init_done(zram);
370
371 return sysfs_emit(buf, "%u\n", val);
372 }
373
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)374 static ssize_t disksize_show(struct device *dev,
375 struct device_attribute *attr, char *buf)
376 {
377 struct zram *zram = dev_to_zram(dev);
378
379 return sysfs_emit(buf, "%llu\n", zram->disksize);
380 }
381
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)382 static ssize_t mem_limit_store(struct device *dev,
383 struct device_attribute *attr, const char *buf,
384 size_t len)
385 {
386 u64 limit;
387 char *tmp;
388 struct zram *zram = dev_to_zram(dev);
389
390 limit = memparse(buf, &tmp);
391 if (buf == tmp) /* no chars parsed, invalid input */
392 return -EINVAL;
393
394 guard(rwsem_write)(&zram->dev_lock);
395 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
396
397 return len;
398 }
399
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)400 static ssize_t mem_used_max_store(struct device *dev,
401 struct device_attribute *attr,
402 const char *buf, size_t len)
403 {
404 int err;
405 unsigned long val;
406 struct zram *zram = dev_to_zram(dev);
407
408 err = kstrtoul(buf, 10, &val);
409 if (err || val != 0)
410 return -EINVAL;
411
412 guard(rwsem_read)(&zram->dev_lock);
413 if (init_done(zram)) {
414 atomic_long_set(&zram->stats.max_used_pages,
415 zs_get_total_pages(zram->mem_pool));
416 }
417
418 return len;
419 }
420
421 /*
422 * Mark all pages which are older than or equal to cutoff as IDLE.
423 * Callers should hold the zram init lock in read mode
424 */
mark_idle(struct zram * zram,ktime_t cutoff)425 static void mark_idle(struct zram *zram, ktime_t cutoff)
426 {
427 int is_idle = 1;
428 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
429 int index;
430
431 for (index = 0; index < nr_pages; index++) {
432 /*
433 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
434 * post-processing (recompress, writeback) happens to the
435 * ZRAM_SAME slot.
436 *
437 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
438 */
439 slot_lock(zram, index);
440 if (!slot_allocated(zram, index) ||
441 test_slot_flag(zram, index, ZRAM_WB) ||
442 test_slot_flag(zram, index, ZRAM_SAME)) {
443 slot_unlock(zram, index);
444 continue;
445 }
446
447 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
448 is_idle = !cutoff ||
449 ktime_after(cutoff, zram->table[index].attr.ac_time);
450 #endif
451 if (is_idle)
452 set_slot_flag(zram, index, ZRAM_IDLE);
453 else
454 clear_slot_flag(zram, index, ZRAM_IDLE);
455 slot_unlock(zram, index);
456 }
457 }
458
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)459 static ssize_t idle_store(struct device *dev, struct device_attribute *attr,
460 const char *buf, size_t len)
461 {
462 struct zram *zram = dev_to_zram(dev);
463 ktime_t cutoff = 0;
464
465 if (!sysfs_streq(buf, "all")) {
466 /*
467 * If it did not parse as 'all' try to treat it as an integer
468 * when we have memory tracking enabled.
469 */
470 u32 age_sec;
471
472 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) &&
473 !kstrtouint(buf, 0, &age_sec))
474 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(),
475 age_sec);
476 else
477 return -EINVAL;
478 }
479
480 guard(rwsem_read)(&zram->dev_lock);
481 if (!init_done(zram))
482 return -EINVAL;
483
484 /*
485 * A cutoff of 0 marks everything as idle, this is the
486 * "all" behavior.
487 */
488 mark_idle(zram, cutoff);
489 return len;
490 }
491
492 #ifdef CONFIG_ZRAM_WRITEBACK
493 #define INVALID_BDEV_BLOCK (~0UL)
494
495 static int read_from_zspool_raw(struct zram *zram, struct page *page,
496 u32 index);
497 static int read_from_zspool(struct zram *zram, struct page *page, u32 index);
498
499 struct zram_wb_ctl {
500 /* idle list is accessed only by the writeback task, no concurency */
501 struct list_head idle_reqs;
502 /* done list is accessed concurrently, protect by done_lock */
503 struct list_head done_reqs;
504 wait_queue_head_t done_wait;
505 spinlock_t done_lock;
506 atomic_t num_inflight;
507 };
508
509 struct zram_wb_req {
510 unsigned long blk_idx;
511 struct page *page;
512 struct zram_pp_slot *pps;
513 struct bio_vec bio_vec;
514 struct bio bio;
515
516 struct list_head entry;
517 };
518
519 struct zram_rb_req {
520 struct work_struct work;
521 struct zram *zram;
522 struct page *page;
523 /* The read bio for backing device */
524 struct bio *bio;
525 unsigned long blk_idx;
526 union {
527 /* The original bio to complete (async read) */
528 struct bio *parent;
529 /* error status (sync read) */
530 int error;
531 };
532 u32 index;
533 };
534
535 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)536 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr,
537 char *buf)
538 {
539 struct zram *zram = dev_to_zram(dev);
540 ssize_t ret;
541
542 guard(rwsem_read)(&zram->dev_lock);
543 ret = sysfs_emit(buf,
544 "%8llu %8llu %8llu\n",
545 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
546 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
547 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
548
549 return ret;
550 }
551
compressed_writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)552 static ssize_t compressed_writeback_store(struct device *dev,
553 struct device_attribute *attr,
554 const char *buf, size_t len)
555 {
556 struct zram *zram = dev_to_zram(dev);
557 bool val;
558
559 if (kstrtobool(buf, &val))
560 return -EINVAL;
561
562 guard(rwsem_write)(&zram->dev_lock);
563 if (init_done(zram)) {
564 return -EBUSY;
565 }
566
567 zram->compressed_wb = val;
568
569 return len;
570 }
571
compressed_writeback_show(struct device * dev,struct device_attribute * attr,char * buf)572 static ssize_t compressed_writeback_show(struct device *dev,
573 struct device_attribute *attr,
574 char *buf)
575 {
576 bool val;
577 struct zram *zram = dev_to_zram(dev);
578
579 guard(rwsem_read)(&zram->dev_lock);
580 val = zram->compressed_wb;
581
582 return sysfs_emit(buf, "%d\n", val);
583 }
584
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)585 static ssize_t writeback_limit_enable_store(struct device *dev,
586 struct device_attribute *attr,
587 const char *buf, size_t len)
588 {
589 struct zram *zram = dev_to_zram(dev);
590 u64 val;
591
592 if (kstrtoull(buf, 10, &val))
593 return -EINVAL;
594
595 guard(rwsem_write)(&zram->dev_lock);
596 zram->wb_limit_enable = val;
597
598 return len;
599 }
600
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)601 static ssize_t writeback_limit_enable_show(struct device *dev,
602 struct device_attribute *attr,
603 char *buf)
604 {
605 bool val;
606 struct zram *zram = dev_to_zram(dev);
607
608 guard(rwsem_read)(&zram->dev_lock);
609 val = zram->wb_limit_enable;
610
611 return sysfs_emit(buf, "%d\n", val);
612 }
613
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)614 static ssize_t writeback_limit_store(struct device *dev,
615 struct device_attribute *attr,
616 const char *buf, size_t len)
617 {
618 struct zram *zram = dev_to_zram(dev);
619 u64 val;
620
621 if (kstrtoull(buf, 10, &val))
622 return -EINVAL;
623
624 /*
625 * When the page size is greater than 4KB, if bd_wb_limit is set to
626 * a value that is not page - size aligned, it will cause value
627 * wrapping. For example, when the page size is set to 16KB and
628 * bd_wb_limit is set to 3, a single write - back operation will
629 * cause bd_wb_limit to become -1. Even more terrifying is that
630 * bd_wb_limit is an unsigned number.
631 */
632 val = rounddown(val, PAGE_SIZE / 4096);
633
634 guard(rwsem_write)(&zram->dev_lock);
635 zram->bd_wb_limit = val;
636
637 return len;
638 }
639
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)640 static ssize_t writeback_limit_show(struct device *dev,
641 struct device_attribute *attr, char *buf)
642 {
643 u64 val;
644 struct zram *zram = dev_to_zram(dev);
645
646 guard(rwsem_read)(&zram->dev_lock);
647 val = zram->bd_wb_limit;
648
649 return sysfs_emit(buf, "%llu\n", val);
650 }
651
writeback_batch_size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)652 static ssize_t writeback_batch_size_store(struct device *dev,
653 struct device_attribute *attr,
654 const char *buf, size_t len)
655 {
656 struct zram *zram = dev_to_zram(dev);
657 u32 val;
658
659 if (kstrtouint(buf, 10, &val))
660 return -EINVAL;
661
662 if (!val)
663 return -EINVAL;
664
665 guard(rwsem_write)(&zram->dev_lock);
666 zram->wb_batch_size = val;
667
668 return len;
669 }
670
writeback_batch_size_show(struct device * dev,struct device_attribute * attr,char * buf)671 static ssize_t writeback_batch_size_show(struct device *dev,
672 struct device_attribute *attr,
673 char *buf)
674 {
675 u32 val;
676 struct zram *zram = dev_to_zram(dev);
677
678 guard(rwsem_read)(&zram->dev_lock);
679 val = zram->wb_batch_size;
680
681 return sysfs_emit(buf, "%u\n", val);
682 }
683
reset_bdev(struct zram * zram)684 static void reset_bdev(struct zram *zram)
685 {
686 if (!zram->backing_dev)
687 return;
688
689 /* hope filp_close flush all of IO */
690 filp_close(zram->backing_dev, NULL);
691 zram->backing_dev = NULL;
692 zram->bdev = NULL;
693 zram->disk->fops = &zram_devops;
694 kvfree(zram->bitmap);
695 zram->bitmap = NULL;
696 }
697
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)698 static ssize_t backing_dev_show(struct device *dev,
699 struct device_attribute *attr, char *buf)
700 {
701 struct file *file;
702 struct zram *zram = dev_to_zram(dev);
703 char *p;
704 ssize_t ret;
705
706 guard(rwsem_read)(&zram->dev_lock);
707 file = zram->backing_dev;
708 if (!file) {
709 memcpy(buf, "none\n", 5);
710 return 5;
711 }
712
713 p = file_path(file, buf, PAGE_SIZE - 1);
714 if (IS_ERR(p))
715 return PTR_ERR(p);
716
717 ret = strlen(p);
718 memmove(buf, p, ret);
719 buf[ret++] = '\n';
720 return ret;
721 }
722
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)723 static ssize_t backing_dev_store(struct device *dev,
724 struct device_attribute *attr, const char *buf,
725 size_t len)
726 {
727 char *file_name;
728 size_t sz;
729 struct file *backing_dev = NULL;
730 struct inode *inode;
731 unsigned int bitmap_sz;
732 unsigned long nr_pages, *bitmap = NULL;
733 int err;
734 struct zram *zram = dev_to_zram(dev);
735
736 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
737 if (!file_name)
738 return -ENOMEM;
739
740 guard(rwsem_write)(&zram->dev_lock);
741 if (init_done(zram)) {
742 pr_info("Can't setup backing device for initialized device\n");
743 err = -EBUSY;
744 goto out;
745 }
746
747 strscpy(file_name, buf, PATH_MAX);
748 /* ignore trailing newline */
749 sz = strlen(file_name);
750 if (sz > 0 && file_name[sz - 1] == '\n')
751 file_name[sz - 1] = 0x00;
752
753 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
754 if (IS_ERR(backing_dev)) {
755 err = PTR_ERR(backing_dev);
756 backing_dev = NULL;
757 goto out;
758 }
759
760 inode = backing_dev->f_mapping->host;
761
762 /* Support only block device in this moment */
763 if (!S_ISBLK(inode->i_mode)) {
764 err = -ENOTBLK;
765 goto out;
766 }
767
768 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
769 /* Refuse to use zero sized device (also prevents self reference) */
770 if (!nr_pages) {
771 err = -EINVAL;
772 goto out;
773 }
774
775 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
776 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
777 if (!bitmap) {
778 err = -ENOMEM;
779 goto out;
780 }
781
782 reset_bdev(zram);
783
784 zram->bdev = I_BDEV(inode);
785 zram->backing_dev = backing_dev;
786 zram->bitmap = bitmap;
787 zram->nr_pages = nr_pages;
788
789 pr_info("setup backing device %s\n", file_name);
790 kfree(file_name);
791
792 return len;
793 out:
794 kvfree(bitmap);
795
796 if (backing_dev)
797 filp_close(backing_dev, NULL);
798
799 kfree(file_name);
800
801 return err;
802 }
803
zram_reserve_bdev_block(struct zram * zram)804 static unsigned long zram_reserve_bdev_block(struct zram *zram)
805 {
806 unsigned long blk_idx;
807
808 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0);
809 if (blk_idx == zram->nr_pages)
810 return INVALID_BDEV_BLOCK;
811
812 set_bit(blk_idx, zram->bitmap);
813 atomic64_inc(&zram->stats.bd_count);
814 return blk_idx;
815 }
816
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)817 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
818 {
819 int was_set;
820
821 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
822 WARN_ON_ONCE(!was_set);
823 atomic64_dec(&zram->stats.bd_count);
824 }
825
release_wb_req(struct zram_wb_req * req)826 static void release_wb_req(struct zram_wb_req *req)
827 {
828 __free_page(req->page);
829 kfree(req);
830 }
831
release_wb_ctl(struct zram_wb_ctl * wb_ctl)832 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
833 {
834 if (!wb_ctl)
835 return;
836
837 /* We should never have inflight requests at this point */
838 WARN_ON(atomic_read(&wb_ctl->num_inflight));
839 WARN_ON(!list_empty(&wb_ctl->done_reqs));
840
841 while (!list_empty(&wb_ctl->idle_reqs)) {
842 struct zram_wb_req *req;
843
844 req = list_first_entry(&wb_ctl->idle_reqs,
845 struct zram_wb_req, entry);
846 list_del(&req->entry);
847 release_wb_req(req);
848 }
849
850 kfree(wb_ctl);
851 }
852
init_wb_ctl(struct zram * zram)853 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
854 {
855 struct zram_wb_ctl *wb_ctl;
856 int i;
857
858 wb_ctl = kmalloc_obj(*wb_ctl);
859 if (!wb_ctl)
860 return NULL;
861
862 INIT_LIST_HEAD(&wb_ctl->idle_reqs);
863 INIT_LIST_HEAD(&wb_ctl->done_reqs);
864 atomic_set(&wb_ctl->num_inflight, 0);
865 init_waitqueue_head(&wb_ctl->done_wait);
866 spin_lock_init(&wb_ctl->done_lock);
867
868 for (i = 0; i < zram->wb_batch_size; i++) {
869 struct zram_wb_req *req;
870
871 /*
872 * This is fatal condition only if we couldn't allocate
873 * any requests at all. Otherwise we just work with the
874 * requests that we have successfully allocated, so that
875 * writeback can still proceed, even if there is only one
876 * request on the idle list.
877 */
878 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN);
879 if (!req)
880 break;
881
882 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN);
883 if (!req->page) {
884 kfree(req);
885 break;
886 }
887
888 list_add(&req->entry, &wb_ctl->idle_reqs);
889 }
890
891 /* We couldn't allocate any requests, so writeabck is not possible */
892 if (list_empty(&wb_ctl->idle_reqs))
893 goto release_wb_ctl;
894
895 return wb_ctl;
896
897 release_wb_ctl:
898 release_wb_ctl(wb_ctl);
899 return NULL;
900 }
901
zram_account_writeback_rollback(struct zram * zram)902 static void zram_account_writeback_rollback(struct zram *zram)
903 {
904 lockdep_assert_held_write(&zram->dev_lock);
905
906 if (zram->wb_limit_enable)
907 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
908 }
909
zram_account_writeback_submit(struct zram * zram)910 static void zram_account_writeback_submit(struct zram *zram)
911 {
912 lockdep_assert_held_write(&zram->dev_lock);
913
914 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
915 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
916 }
917
zram_writeback_complete(struct zram * zram,struct zram_wb_req * req)918 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
919 {
920 u32 index = req->pps->index;
921 int err;
922
923 err = blk_status_to_errno(req->bio.bi_status);
924 if (err) {
925 /*
926 * Failed wb requests should not be accounted in wb_limit
927 * (if enabled).
928 */
929 zram_account_writeback_rollback(zram);
930 zram_release_bdev_block(zram, req->blk_idx);
931 return err;
932 }
933
934 atomic64_inc(&zram->stats.bd_writes);
935 slot_lock(zram, index);
936 /*
937 * We release slot lock during writeback so slot can change under us:
938 * slot_free() or slot_free() and zram_write_page(). In both cases
939 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can
940 * set ZRAM_PP_SLOT on such slots until current post-processing
941 * finishes.
942 */
943 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) {
944 zram_release_bdev_block(zram, req->blk_idx);
945 goto out;
946 }
947
948 clear_slot_flag(zram, index, ZRAM_IDLE);
949 if (test_slot_flag(zram, index, ZRAM_HUGE))
950 atomic64_dec(&zram->stats.huge_pages);
951 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size);
952 zs_free(zram->mem_pool, get_slot_handle(zram, index));
953 set_slot_handle(zram, index, req->blk_idx);
954 set_slot_flag(zram, index, ZRAM_WB);
955
956 out:
957 slot_unlock(zram, index);
958 return 0;
959 }
960
zram_writeback_endio(struct bio * bio)961 static void zram_writeback_endio(struct bio *bio)
962 {
963 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio);
964 struct zram_wb_ctl *wb_ctl = bio->bi_private;
965 unsigned long flags;
966
967 spin_lock_irqsave(&wb_ctl->done_lock, flags);
968 list_add(&req->entry, &wb_ctl->done_reqs);
969 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
970
971 wake_up(&wb_ctl->done_wait);
972 }
973
zram_submit_wb_request(struct zram * zram,struct zram_wb_ctl * wb_ctl,struct zram_wb_req * req)974 static void zram_submit_wb_request(struct zram *zram,
975 struct zram_wb_ctl *wb_ctl,
976 struct zram_wb_req *req)
977 {
978 /*
979 * wb_limit (if enabled) should be adjusted before submission,
980 * so that we don't over-submit.
981 */
982 zram_account_writeback_submit(zram);
983 atomic_inc(&wb_ctl->num_inflight);
984 req->bio.bi_private = wb_ctl;
985 submit_bio(&req->bio);
986 }
987
zram_complete_done_reqs(struct zram * zram,struct zram_wb_ctl * wb_ctl)988 static int zram_complete_done_reqs(struct zram *zram,
989 struct zram_wb_ctl *wb_ctl)
990 {
991 struct zram_wb_req *req;
992 unsigned long flags;
993 int ret = 0, err;
994
995 while (atomic_read(&wb_ctl->num_inflight) > 0) {
996 spin_lock_irqsave(&wb_ctl->done_lock, flags);
997 req = list_first_entry_or_null(&wb_ctl->done_reqs,
998 struct zram_wb_req, entry);
999 if (req)
1000 list_del(&req->entry);
1001 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
1002
1003 /* ->num_inflight > 0 doesn't mean we have done requests */
1004 if (!req)
1005 break;
1006
1007 err = zram_writeback_complete(zram, req);
1008 if (err)
1009 ret = err;
1010
1011 atomic_dec(&wb_ctl->num_inflight);
1012 release_pp_slot(zram, req->pps);
1013 req->pps = NULL;
1014
1015 list_add(&req->entry, &wb_ctl->idle_reqs);
1016 }
1017
1018 return ret;
1019 }
1020
zram_select_idle_req(struct zram_wb_ctl * wb_ctl)1021 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl)
1022 {
1023 struct zram_wb_req *req;
1024
1025 req = list_first_entry_or_null(&wb_ctl->idle_reqs,
1026 struct zram_wb_req, entry);
1027 if (req)
1028 list_del(&req->entry);
1029 return req;
1030 }
1031
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl,struct zram_wb_ctl * wb_ctl)1032 static int zram_writeback_slots(struct zram *zram,
1033 struct zram_pp_ctl *ctl,
1034 struct zram_wb_ctl *wb_ctl)
1035 {
1036 unsigned long blk_idx = INVALID_BDEV_BLOCK;
1037 struct zram_wb_req *req = NULL;
1038 struct zram_pp_slot *pps;
1039 int ret = 0, err = 0;
1040 u32 index = 0;
1041
1042 while ((pps = select_pp_slot(ctl))) {
1043 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
1044 ret = -EIO;
1045 break;
1046 }
1047
1048 while (!req) {
1049 req = zram_select_idle_req(wb_ctl);
1050 if (req)
1051 break;
1052
1053 wait_event(wb_ctl->done_wait,
1054 !list_empty(&wb_ctl->done_reqs));
1055
1056 err = zram_complete_done_reqs(zram, wb_ctl);
1057 /*
1058 * BIO errors are not fatal, we continue and simply
1059 * attempt to writeback the remaining objects (pages).
1060 * At the same time we need to signal user-space that
1061 * some writes (at least one, but also could be all of
1062 * them) were not successful and we do so by returning
1063 * the most recent BIO error.
1064 */
1065 if (err)
1066 ret = err;
1067 }
1068
1069 if (blk_idx == INVALID_BDEV_BLOCK) {
1070 blk_idx = zram_reserve_bdev_block(zram);
1071 if (blk_idx == INVALID_BDEV_BLOCK) {
1072 ret = -ENOSPC;
1073 break;
1074 }
1075 }
1076
1077 index = pps->index;
1078 slot_lock(zram, index);
1079 /*
1080 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
1081 * slots can change in the meantime. If slots are accessed or
1082 * freed they lose ZRAM_PP_SLOT flag and hence we don't
1083 * post-process them.
1084 */
1085 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT))
1086 goto next;
1087 if (zram->compressed_wb)
1088 err = read_from_zspool_raw(zram, req->page, index);
1089 else
1090 err = read_from_zspool(zram, req->page, index);
1091 if (err)
1092 goto next;
1093 slot_unlock(zram, index);
1094
1095 /*
1096 * From now on pp-slot is owned by the req, remove it from
1097 * its pp bucket.
1098 */
1099 list_del_init(&pps->entry);
1100
1101 req->blk_idx = blk_idx;
1102 req->pps = pps;
1103 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE);
1104 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1105 req->bio.bi_end_io = zram_writeback_endio;
1106 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0);
1107
1108 zram_submit_wb_request(zram, wb_ctl, req);
1109 blk_idx = INVALID_BDEV_BLOCK;
1110 req = NULL;
1111 cond_resched();
1112 continue;
1113
1114 next:
1115 slot_unlock(zram, index);
1116 release_pp_slot(zram, pps);
1117 }
1118
1119 /*
1120 * Selected idle req, but never submitted it due to some error or
1121 * wb limit.
1122 */
1123 if (req)
1124 release_wb_req(req);
1125
1126 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1127 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs));
1128 err = zram_complete_done_reqs(zram, wb_ctl);
1129 if (err)
1130 ret = err;
1131 }
1132
1133 return ret;
1134 }
1135
1136 #define PAGE_WRITEBACK 0
1137 #define HUGE_WRITEBACK (1 << 0)
1138 #define IDLE_WRITEBACK (1 << 1)
1139 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
1140
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1141 static int parse_page_index(char *val, unsigned long nr_pages,
1142 unsigned long *lo, unsigned long *hi)
1143 {
1144 int ret;
1145
1146 ret = kstrtoul(val, 10, lo);
1147 if (ret)
1148 return ret;
1149 if (*lo >= nr_pages)
1150 return -ERANGE;
1151 *hi = *lo + 1;
1152 return 0;
1153 }
1154
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1155 static int parse_page_indexes(char *val, unsigned long nr_pages,
1156 unsigned long *lo, unsigned long *hi)
1157 {
1158 char *delim;
1159 int ret;
1160
1161 delim = strchr(val, '-');
1162 if (!delim)
1163 return -EINVAL;
1164
1165 *delim = 0x00;
1166 ret = kstrtoul(val, 10, lo);
1167 if (ret)
1168 return ret;
1169 if (*lo >= nr_pages)
1170 return -ERANGE;
1171
1172 ret = kstrtoul(delim + 1, 10, hi);
1173 if (ret)
1174 return ret;
1175 if (*hi >= nr_pages || *lo > *hi)
1176 return -ERANGE;
1177 *hi += 1;
1178 return 0;
1179 }
1180
parse_mode(char * val,u32 * mode)1181 static int parse_mode(char *val, u32 *mode)
1182 {
1183 *mode = 0;
1184
1185 if (!strcmp(val, "idle"))
1186 *mode = IDLE_WRITEBACK;
1187 if (!strcmp(val, "huge"))
1188 *mode = HUGE_WRITEBACK;
1189 if (!strcmp(val, "huge_idle"))
1190 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
1191 if (!strcmp(val, "incompressible"))
1192 *mode = INCOMPRESSIBLE_WRITEBACK;
1193
1194 if (*mode == 0)
1195 return -EINVAL;
1196 return 0;
1197 }
1198
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)1199 static void scan_slots_for_writeback(struct zram *zram, u32 mode,
1200 unsigned long lo, unsigned long hi,
1201 struct zram_pp_ctl *ctl)
1202 {
1203 u32 index = lo;
1204
1205 while (index < hi) {
1206 bool ok = true;
1207
1208 slot_lock(zram, index);
1209 if (!slot_allocated(zram, index))
1210 goto next;
1211
1212 if (test_slot_flag(zram, index, ZRAM_WB) ||
1213 test_slot_flag(zram, index, ZRAM_SAME))
1214 goto next;
1215
1216 if (mode & IDLE_WRITEBACK &&
1217 !test_slot_flag(zram, index, ZRAM_IDLE))
1218 goto next;
1219 if (mode & HUGE_WRITEBACK &&
1220 !test_slot_flag(zram, index, ZRAM_HUGE))
1221 goto next;
1222 if (mode & INCOMPRESSIBLE_WRITEBACK &&
1223 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1224 goto next;
1225
1226 ok = place_pp_slot(zram, ctl, index);
1227 next:
1228 slot_unlock(zram, index);
1229 if (!ok)
1230 break;
1231 index++;
1232 }
1233 }
1234
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1235 static ssize_t writeback_store(struct device *dev,
1236 struct device_attribute *attr,
1237 const char *buf, size_t len)
1238 {
1239 struct zram *zram = dev_to_zram(dev);
1240 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
1241 unsigned long lo = 0, hi = nr_pages;
1242 struct zram_pp_ctl *pp_ctl = NULL;
1243 struct zram_wb_ctl *wb_ctl = NULL;
1244 char *args, *param, *val;
1245 ssize_t ret = len;
1246 int err, mode = 0;
1247
1248 guard(rwsem_write)(&zram->dev_lock);
1249 if (!init_done(zram))
1250 return -EINVAL;
1251
1252 if (!zram->backing_dev)
1253 return -ENODEV;
1254
1255 pp_ctl = init_pp_ctl();
1256 if (!pp_ctl)
1257 return -ENOMEM;
1258
1259 wb_ctl = init_wb_ctl(zram);
1260 if (!wb_ctl) {
1261 ret = -ENOMEM;
1262 goto out;
1263 }
1264
1265 args = skip_spaces(buf);
1266 while (*args) {
1267 args = next_arg(args, ¶m, &val);
1268
1269 /*
1270 * Workaround to support the old writeback interface.
1271 *
1272 * The old writeback interface has a minor inconsistency and
1273 * requires key=value only for page_index parameter, while the
1274 * writeback mode is a valueless parameter.
1275 *
1276 * This is not the case anymore and now all parameters are
1277 * required to have values, however, we need to support the
1278 * legacy writeback interface format so we check if we can
1279 * recognize a valueless parameter as the (legacy) writeback
1280 * mode.
1281 */
1282 if (!val || !*val) {
1283 err = parse_mode(param, &mode);
1284 if (err) {
1285 ret = err;
1286 goto out;
1287 }
1288
1289 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1290 break;
1291 }
1292
1293 if (!strcmp(param, "type")) {
1294 err = parse_mode(val, &mode);
1295 if (err) {
1296 ret = err;
1297 goto out;
1298 }
1299
1300 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1301 break;
1302 }
1303
1304 if (!strcmp(param, "page_index")) {
1305 err = parse_page_index(val, nr_pages, &lo, &hi);
1306 if (err) {
1307 ret = err;
1308 goto out;
1309 }
1310
1311 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1312 continue;
1313 }
1314
1315 if (!strcmp(param, "page_indexes")) {
1316 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1317 if (err) {
1318 ret = err;
1319 goto out;
1320 }
1321
1322 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1323 continue;
1324 }
1325 }
1326
1327 err = zram_writeback_slots(zram, pp_ctl, wb_ctl);
1328 if (err)
1329 ret = err;
1330
1331 out:
1332 release_pp_ctl(zram, pp_ctl);
1333 release_wb_ctl(wb_ctl);
1334
1335 return ret;
1336 }
1337
decompress_bdev_page(struct zram * zram,struct page * page,u32 index)1338 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index)
1339 {
1340 struct zcomp_strm *zstrm;
1341 unsigned int size;
1342 int ret, prio;
1343 void *src;
1344
1345 slot_lock(zram, index);
1346 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */
1347 if (!test_slot_flag(zram, index, ZRAM_WB)) {
1348 slot_unlock(zram, index);
1349 /* We read some stale data, zero it out */
1350 memset_page(page, 0, 0, PAGE_SIZE);
1351 return -EIO;
1352 }
1353
1354 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
1355 slot_unlock(zram, index);
1356 return 0;
1357 }
1358
1359 size = get_slot_size(zram, index);
1360 prio = get_slot_comp_priority(zram, index);
1361
1362 zstrm = zcomp_stream_get(zram->comps[prio]);
1363 src = kmap_local_page(page);
1364 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size,
1365 zstrm->local_copy);
1366 if (!ret)
1367 copy_page(src, zstrm->local_copy);
1368 kunmap_local(src);
1369 zcomp_stream_put(zstrm);
1370 slot_unlock(zram, index);
1371
1372 return ret;
1373 }
1374
zram_deferred_decompress(struct work_struct * w)1375 static void zram_deferred_decompress(struct work_struct *w)
1376 {
1377 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1378 struct page *page = bio_first_page_all(req->bio);
1379 struct zram *zram = req->zram;
1380 u32 index = req->index;
1381 int ret;
1382
1383 ret = decompress_bdev_page(zram, page, index);
1384 if (ret)
1385 req->parent->bi_status = BLK_STS_IOERR;
1386
1387 /* Decrement parent's ->remaining */
1388 bio_endio(req->parent);
1389 bio_put(req->bio);
1390 kfree(req);
1391 }
1392
zram_async_read_endio(struct bio * bio)1393 static void zram_async_read_endio(struct bio *bio)
1394 {
1395 struct zram_rb_req *req = bio->bi_private;
1396 struct zram *zram = req->zram;
1397
1398 if (bio->bi_status) {
1399 req->parent->bi_status = bio->bi_status;
1400 bio_endio(req->parent);
1401 bio_put(bio);
1402 kfree(req);
1403 return;
1404 }
1405
1406 /*
1407 * NOTE: zram_async_read_endio() is not exactly right place for this.
1408 * Ideally, we need to do it after ZRAM_WB check, but this requires
1409 * us to use wq path even on systems that don't enable compressed
1410 * writeback, because we cannot take slot-lock in the current context.
1411 *
1412 * Keep the existing behavior for now.
1413 */
1414 if (zram->compressed_wb == false) {
1415 /* No decompression needed, complete the parent IO */
1416 bio_endio(req->parent);
1417 bio_put(bio);
1418 kfree(req);
1419 return;
1420 }
1421
1422 /*
1423 * zram decompression is sleepable, so we need to deffer it to
1424 * a preemptible context.
1425 */
1426 INIT_WORK(&req->work, zram_deferred_decompress);
1427 queue_work(system_highpri_wq, &req->work);
1428 }
1429
read_from_bdev_async(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1430 static int read_from_bdev_async(struct zram *zram, struct page *page,
1431 u32 index, unsigned long blk_idx,
1432 struct bio *parent)
1433 {
1434 struct zram_rb_req *req;
1435 struct bio *bio;
1436
1437 req = kmalloc_obj(*req, GFP_NOIO);
1438 if (!req)
1439 return -ENOMEM;
1440
1441 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
1442 if (!bio) {
1443 kfree(req);
1444 return -ENOMEM;
1445 }
1446
1447 req->zram = zram;
1448 req->index = index;
1449 req->blk_idx = blk_idx;
1450 req->bio = bio;
1451 req->parent = parent;
1452
1453 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
1454 bio->bi_private = req;
1455 bio->bi_end_io = zram_async_read_endio;
1456
1457 __bio_add_page(bio, page, PAGE_SIZE, 0);
1458 bio_inc_remaining(parent);
1459 submit_bio(bio);
1460
1461 return 0;
1462 }
1463
zram_sync_read(struct work_struct * w)1464 static void zram_sync_read(struct work_struct *w)
1465 {
1466 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1467 struct bio_vec bv;
1468 struct bio bio;
1469
1470 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ);
1471 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1472 __bio_add_page(&bio, req->page, PAGE_SIZE, 0);
1473 req->error = submit_bio_wait(&bio);
1474 }
1475
1476 /*
1477 * Block layer want one ->submit_bio to be active at a time, so if we use
1478 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1479 * use a worker thread context.
1480 */
read_from_bdev_sync(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx)1481 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index,
1482 unsigned long blk_idx)
1483 {
1484 struct zram_rb_req req;
1485
1486 req.page = page;
1487 req.zram = zram;
1488 req.blk_idx = blk_idx;
1489
1490 INIT_WORK_ONSTACK(&req.work, zram_sync_read);
1491 queue_work(system_dfl_wq, &req.work);
1492 flush_work(&req.work);
1493 destroy_work_on_stack(&req.work);
1494
1495 if (req.error || zram->compressed_wb == false)
1496 return req.error;
1497
1498 return decompress_bdev_page(zram, page, index);
1499 }
1500
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1501 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1502 unsigned long blk_idx, struct bio *parent)
1503 {
1504 atomic64_inc(&zram->stats.bd_reads);
1505 if (!parent) {
1506 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1507 return -EIO;
1508 return read_from_bdev_sync(zram, page, index, blk_idx);
1509 }
1510 return read_from_bdev_async(zram, page, index, blk_idx, parent);
1511 }
1512 #else
reset_bdev(struct zram * zram)1513 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1514 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1515 unsigned long blk_idx, struct bio *parent)
1516 {
1517 return -EIO;
1518 }
1519
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)1520 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
1521 {
1522 }
1523 #endif
1524
1525 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1526
1527 static struct dentry *zram_debugfs_root;
1528
zram_debugfs_create(void)1529 static void zram_debugfs_create(void)
1530 {
1531 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1532 }
1533
zram_debugfs_destroy(void)1534 static void zram_debugfs_destroy(void)
1535 {
1536 debugfs_remove_recursive(zram_debugfs_root);
1537 }
1538
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1539 static ssize_t read_block_state(struct file *file, char __user *buf,
1540 size_t count, loff_t *ppos)
1541 {
1542 char *kbuf;
1543 ssize_t index, written = 0;
1544 struct zram *zram = file->private_data;
1545 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1546
1547 kbuf = kvmalloc(count, GFP_KERNEL);
1548 if (!kbuf)
1549 return -ENOMEM;
1550
1551 guard(rwsem_read)(&zram->dev_lock);
1552 if (!init_done(zram)) {
1553 kvfree(kbuf);
1554 return -EINVAL;
1555 }
1556
1557 for (index = *ppos; index < nr_pages; index++) {
1558 int copied;
1559
1560 slot_lock(zram, index);
1561 if (!slot_allocated(zram, index))
1562 goto next;
1563
1564 copied = snprintf(kbuf + written, count,
1565 "%12zd %12u.%06d %c%c%c%c%c%c\n",
1566 index, zram->table[index].attr.ac_time, 0,
1567 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1568 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1569 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1570 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1571 get_slot_comp_priority(zram, index) ? 'r' : '.',
1572 test_slot_flag(zram, index,
1573 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1574
1575 if (count <= copied) {
1576 slot_unlock(zram, index);
1577 break;
1578 }
1579 written += copied;
1580 count -= copied;
1581 next:
1582 slot_unlock(zram, index);
1583 *ppos += 1;
1584 }
1585
1586 if (copy_to_user(buf, kbuf, written))
1587 written = -EFAULT;
1588 kvfree(kbuf);
1589
1590 return written;
1591 }
1592
1593 static const struct file_operations proc_zram_block_state_op = {
1594 .open = simple_open,
1595 .read = read_block_state,
1596 .llseek = default_llseek,
1597 };
1598
zram_debugfs_register(struct zram * zram)1599 static void zram_debugfs_register(struct zram *zram)
1600 {
1601 if (!zram_debugfs_root)
1602 return;
1603
1604 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1605 zram_debugfs_root);
1606 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1607 zram, &proc_zram_block_state_op);
1608 }
1609
zram_debugfs_unregister(struct zram * zram)1610 static void zram_debugfs_unregister(struct zram *zram)
1611 {
1612 debugfs_remove_recursive(zram->debugfs_dir);
1613 }
1614 #else
zram_debugfs_create(void)1615 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1616 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1617 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1618 static void zram_debugfs_unregister(struct zram *zram) {};
1619 #endif
1620
1621 /* Only algo parameter given, lookup by algo name */
lookup_algo_priority(struct zram * zram,const char * algo,u32 min_prio)1622 static int lookup_algo_priority(struct zram *zram, const char *algo,
1623 u32 min_prio)
1624 {
1625 s32 prio;
1626
1627 for (prio = min_prio; prio < ZRAM_MAX_COMPS; prio++) {
1628 if (!zram->comp_algs[prio])
1629 continue;
1630
1631 if (!strcmp(zram->comp_algs[prio], algo))
1632 return prio;
1633 }
1634
1635 return -EINVAL;
1636 }
1637
1638 /* Both algo and priority parameters given, validate them */
validate_algo_priority(struct zram * zram,const char * algo,u32 prio)1639 static int validate_algo_priority(struct zram *zram, const char *algo, u32 prio)
1640 {
1641 if (prio >= ZRAM_MAX_COMPS)
1642 return -EINVAL;
1643 /* No algo at given priority */
1644 if (!zram->comp_algs[prio])
1645 return -EINVAL;
1646 /* A different algo at given priority */
1647 if (strcmp(zram->comp_algs[prio], algo))
1648 return -EINVAL;
1649 return 0;
1650 }
1651
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1652 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1653 {
1654 zram->comp_algs[prio] = alg;
1655 }
1656
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1657 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1658 {
1659 const char *alg;
1660 size_t sz;
1661
1662 sz = strlen(buf);
1663 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1664 return -E2BIG;
1665
1666 alg = zcomp_lookup_backend_name(buf);
1667 if (!alg)
1668 return -EINVAL;
1669
1670 guard(rwsem_write)(&zram->dev_lock);
1671 if (init_done(zram)) {
1672 pr_info("Can't change algorithm for initialized device\n");
1673 return -EBUSY;
1674 }
1675
1676 comp_algorithm_set(zram, prio, alg);
1677 return 0;
1678 }
1679
comp_params_reset(struct zram * zram,u32 prio)1680 static void comp_params_reset(struct zram *zram, u32 prio)
1681 {
1682 struct zcomp_params *params = &zram->params[prio];
1683
1684 vfree(params->dict);
1685 params->level = ZCOMP_PARAM_NOT_SET;
1686 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1687 params->dict_sz = 0;
1688 params->dict = NULL;
1689 }
1690
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1691 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1692 const char *dict_path,
1693 struct deflate_params *deflate_params)
1694 {
1695 ssize_t sz = 0;
1696
1697 comp_params_reset(zram, prio);
1698
1699 if (dict_path) {
1700 sz = kernel_read_file_from_path(dict_path, 0,
1701 &zram->params[prio].dict,
1702 INT_MAX,
1703 NULL,
1704 READING_POLICY);
1705 if (sz < 0)
1706 return -EINVAL;
1707 }
1708
1709 zram->params[prio].dict_sz = sz;
1710 zram->params[prio].level = level;
1711 zram->params[prio].deflate.winbits = deflate_params->winbits;
1712 return 0;
1713 }
1714
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1715 static ssize_t algorithm_params_store(struct device *dev,
1716 struct device_attribute *attr,
1717 const char *buf,
1718 size_t len)
1719 {
1720 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1721 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1722 struct deflate_params deflate_params;
1723 struct zram *zram = dev_to_zram(dev);
1724 bool prio_param = false;
1725 int ret;
1726
1727 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1728
1729 args = skip_spaces(buf);
1730 while (*args) {
1731 args = next_arg(args, ¶m, &val);
1732
1733 if (!val || !*val)
1734 return -EINVAL;
1735
1736 if (!strcmp(param, "priority")) {
1737 prio_param = true;
1738 ret = kstrtoint(val, 10, &prio);
1739 if (ret)
1740 return ret;
1741 continue;
1742 }
1743
1744 if (!strcmp(param, "level")) {
1745 ret = kstrtoint(val, 10, &level);
1746 if (ret)
1747 return ret;
1748 continue;
1749 }
1750
1751 if (!strcmp(param, "algo")) {
1752 algo = val;
1753 continue;
1754 }
1755
1756 if (!strcmp(param, "dict")) {
1757 dict_path = val;
1758 continue;
1759 }
1760
1761 if (!strcmp(param, "deflate.winbits")) {
1762 ret = kstrtoint(val, 10, &deflate_params.winbits);
1763 if (ret)
1764 return ret;
1765 continue;
1766 }
1767 }
1768
1769 guard(rwsem_write)(&zram->dev_lock);
1770 if (init_done(zram))
1771 return -EBUSY;
1772
1773 if (prio_param) {
1774 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1775 return -EINVAL;
1776 }
1777
1778 if (algo && prio_param) {
1779 ret = validate_algo_priority(zram, algo, prio);
1780 if (ret)
1781 return ret;
1782 }
1783
1784 if (algo && !prio_param) {
1785 prio = lookup_algo_priority(zram, algo, ZRAM_PRIMARY_COMP);
1786 if (prio < 0)
1787 return -EINVAL;
1788 }
1789
1790 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1791 return ret ? ret : len;
1792 }
1793
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1794 static ssize_t comp_algorithm_show(struct device *dev,
1795 struct device_attribute *attr,
1796 char *buf)
1797 {
1798 struct zram *zram = dev_to_zram(dev);
1799 ssize_t sz;
1800
1801 guard(rwsem_read)(&zram->dev_lock);
1802 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1803 return sz;
1804 }
1805
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1806 static ssize_t comp_algorithm_store(struct device *dev,
1807 struct device_attribute *attr,
1808 const char *buf,
1809 size_t len)
1810 {
1811 struct zram *zram = dev_to_zram(dev);
1812 int ret;
1813
1814 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1815 return ret ? ret : len;
1816 }
1817
1818 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1819 static ssize_t recomp_algorithm_show(struct device *dev,
1820 struct device_attribute *attr,
1821 char *buf)
1822 {
1823 struct zram *zram = dev_to_zram(dev);
1824 ssize_t sz = 0;
1825 u32 prio;
1826
1827 guard(rwsem_read)(&zram->dev_lock);
1828 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1829 if (!zram->comp_algs[prio])
1830 continue;
1831
1832 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1833 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1834 }
1835 return sz;
1836 }
1837
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1838 static ssize_t recomp_algorithm_store(struct device *dev,
1839 struct device_attribute *attr,
1840 const char *buf,
1841 size_t len)
1842 {
1843 struct zram *zram = dev_to_zram(dev);
1844 int prio = ZRAM_SECONDARY_COMP;
1845 char *args, *param, *val;
1846 char *alg = NULL;
1847 int ret;
1848
1849 args = skip_spaces(buf);
1850 while (*args) {
1851 args = next_arg(args, ¶m, &val);
1852
1853 if (!val || !*val)
1854 return -EINVAL;
1855
1856 if (!strcmp(param, "algo")) {
1857 alg = val;
1858 continue;
1859 }
1860
1861 if (!strcmp(param, "priority")) {
1862 ret = kstrtoint(val, 10, &prio);
1863 if (ret)
1864 return ret;
1865 continue;
1866 }
1867 }
1868
1869 if (!alg)
1870 return -EINVAL;
1871
1872 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1873 return -EINVAL;
1874
1875 ret = __comp_algorithm_store(zram, prio, alg);
1876 return ret ? ret : len;
1877 }
1878 #endif
1879
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1880 static ssize_t compact_store(struct device *dev, struct device_attribute *attr,
1881 const char *buf, size_t len)
1882 {
1883 struct zram *zram = dev_to_zram(dev);
1884
1885 guard(rwsem_read)(&zram->dev_lock);
1886 if (!init_done(zram))
1887 return -EINVAL;
1888
1889 zs_compact(zram->mem_pool);
1890
1891 return len;
1892 }
1893
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1894 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr,
1895 char *buf)
1896 {
1897 struct zram *zram = dev_to_zram(dev);
1898 ssize_t ret;
1899
1900 guard(rwsem_read)(&zram->dev_lock);
1901 ret = sysfs_emit(buf,
1902 "%8llu %8llu 0 %8llu\n",
1903 (u64)atomic64_read(&zram->stats.failed_reads),
1904 (u64)atomic64_read(&zram->stats.failed_writes),
1905 (u64)atomic64_read(&zram->stats.notify_free));
1906
1907 return ret;
1908 }
1909
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1910 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr,
1911 char *buf)
1912 {
1913 struct zram *zram = dev_to_zram(dev);
1914 struct zs_pool_stats pool_stats;
1915 u64 orig_size, mem_used = 0;
1916 long max_used;
1917 ssize_t ret;
1918
1919 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1920
1921 guard(rwsem_read)(&zram->dev_lock);
1922 if (init_done(zram)) {
1923 mem_used = zs_get_total_pages(zram->mem_pool);
1924 zs_pool_stats(zram->mem_pool, &pool_stats);
1925 }
1926
1927 orig_size = atomic64_read(&zram->stats.pages_stored);
1928 max_used = atomic_long_read(&zram->stats.max_used_pages);
1929
1930 ret = sysfs_emit(buf,
1931 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1932 orig_size << PAGE_SHIFT,
1933 (u64)atomic64_read(&zram->stats.compr_data_size),
1934 mem_used << PAGE_SHIFT,
1935 zram->limit_pages << PAGE_SHIFT,
1936 max_used << PAGE_SHIFT,
1937 (u64)atomic64_read(&zram->stats.same_pages),
1938 atomic_long_read(&pool_stats.pages_compacted),
1939 (u64)atomic64_read(&zram->stats.huge_pages),
1940 (u64)atomic64_read(&zram->stats.huge_pages_since));
1941
1942 return ret;
1943 }
1944
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1945 static ssize_t debug_stat_show(struct device *dev,
1946 struct device_attribute *attr, char *buf)
1947 {
1948 int version = 1;
1949 struct zram *zram = dev_to_zram(dev);
1950 ssize_t ret;
1951
1952 guard(rwsem_read)(&zram->dev_lock);
1953 ret = sysfs_emit(buf,
1954 "version: %d\n0 %8llu\n",
1955 version,
1956 (u64)atomic64_read(&zram->stats.miss_free));
1957
1958 return ret;
1959 }
1960
zram_meta_free(struct zram * zram,u64 disksize)1961 static void zram_meta_free(struct zram *zram, u64 disksize)
1962 {
1963 size_t num_pages = disksize >> PAGE_SHIFT;
1964 size_t index;
1965
1966 if (!zram->table)
1967 return;
1968
1969 /* Free all pages that are still in this zram device */
1970 for (index = 0; index < num_pages; index++)
1971 slot_free(zram, index);
1972
1973 zs_destroy_pool(zram->mem_pool);
1974 vfree(zram->table);
1975 zram->table = NULL;
1976 }
1977
zram_meta_alloc(struct zram * zram,u64 disksize)1978 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1979 {
1980 size_t num_pages, index;
1981
1982 num_pages = disksize >> PAGE_SHIFT;
1983 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1984 if (!zram->table)
1985 return false;
1986
1987 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1988 if (!zram->mem_pool) {
1989 vfree(zram->table);
1990 zram->table = NULL;
1991 return false;
1992 }
1993
1994 if (!huge_class_size)
1995 huge_class_size = zs_huge_class_size(zram->mem_pool);
1996
1997 for (index = 0; index < num_pages; index++)
1998 slot_lock_init(zram, index);
1999
2000 return true;
2001 }
2002
slot_free(struct zram * zram,u32 index)2003 static void slot_free(struct zram *zram, u32 index)
2004 {
2005 unsigned long handle;
2006
2007 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
2008 zram->table[index].attr.ac_time = 0;
2009 #endif
2010
2011 clear_slot_flag(zram, index, ZRAM_IDLE);
2012 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2013 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
2014 set_slot_comp_priority(zram, index, 0);
2015
2016 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
2017 /*
2018 * Writeback completion decrements ->huge_pages but keeps
2019 * ZRAM_HUGE flag for deferred decompression path.
2020 */
2021 if (!test_slot_flag(zram, index, ZRAM_WB))
2022 atomic64_dec(&zram->stats.huge_pages);
2023 clear_slot_flag(zram, index, ZRAM_HUGE);
2024 }
2025
2026 if (test_slot_flag(zram, index, ZRAM_WB)) {
2027 clear_slot_flag(zram, index, ZRAM_WB);
2028 zram_release_bdev_block(zram, get_slot_handle(zram, index));
2029 goto out;
2030 }
2031
2032 /*
2033 * No memory is allocated for same element filled pages.
2034 * Simply clear same page flag.
2035 */
2036 if (test_slot_flag(zram, index, ZRAM_SAME)) {
2037 clear_slot_flag(zram, index, ZRAM_SAME);
2038 atomic64_dec(&zram->stats.same_pages);
2039 goto out;
2040 }
2041
2042 handle = get_slot_handle(zram, index);
2043 if (!handle)
2044 return;
2045
2046 zs_free(zram->mem_pool, handle);
2047
2048 atomic64_sub(get_slot_size(zram, index),
2049 &zram->stats.compr_data_size);
2050 out:
2051 atomic64_dec(&zram->stats.pages_stored);
2052 set_slot_handle(zram, index, 0);
2053 set_slot_size(zram, index, 0);
2054 }
2055
read_same_filled_page(struct zram * zram,struct page * page,u32 index)2056 static int read_same_filled_page(struct zram *zram, struct page *page,
2057 u32 index)
2058 {
2059 void *mem;
2060
2061 mem = kmap_local_page(page);
2062 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index));
2063 kunmap_local(mem);
2064 return 0;
2065 }
2066
read_incompressible_page(struct zram * zram,struct page * page,u32 index)2067 static int read_incompressible_page(struct zram *zram, struct page *page,
2068 u32 index)
2069 {
2070 unsigned long handle;
2071 void *src, *dst;
2072
2073 handle = get_slot_handle(zram, index);
2074 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
2075 dst = kmap_local_page(page);
2076 copy_page(dst, src);
2077 kunmap_local(dst);
2078 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
2079
2080 return 0;
2081 }
2082
read_compressed_page(struct zram * zram,struct page * page,u32 index)2083 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
2084 {
2085 struct zcomp_strm *zstrm;
2086 unsigned long handle;
2087 unsigned int size;
2088 void *src, *dst;
2089 int ret, prio;
2090
2091 handle = get_slot_handle(zram, index);
2092 size = get_slot_size(zram, index);
2093 prio = get_slot_comp_priority(zram, index);
2094
2095 zstrm = zcomp_stream_get(zram->comps[prio]);
2096 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2097 zstrm->local_copy);
2098 dst = kmap_local_page(page);
2099 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
2100 kunmap_local(dst);
2101 zs_obj_read_end(zram->mem_pool, handle, size, src);
2102 zcomp_stream_put(zstrm);
2103
2104 return ret;
2105 }
2106
2107 #if defined CONFIG_ZRAM_WRITEBACK
read_from_zspool_raw(struct zram * zram,struct page * page,u32 index)2108 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
2109 {
2110 struct zcomp_strm *zstrm;
2111 unsigned long handle;
2112 unsigned int size;
2113 void *src;
2114
2115 handle = get_slot_handle(zram, index);
2116 size = get_slot_size(zram, index);
2117
2118 /*
2119 * We need to get stream just for ->local_copy buffer, in
2120 * case if object spans two physical pages. No decompression
2121 * takes place here, as we read raw compressed data.
2122 */
2123 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2124 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2125 zstrm->local_copy);
2126 memcpy_to_page(page, 0, src, size);
2127 zs_obj_read_end(zram->mem_pool, handle, size, src);
2128 zcomp_stream_put(zstrm);
2129
2130 return 0;
2131 }
2132 #endif
2133
2134 /*
2135 * Reads (decompresses if needed) a page from zspool (zsmalloc).
2136 * Corresponding ZRAM slot should be locked.
2137 */
read_from_zspool(struct zram * zram,struct page * page,u32 index)2138 static int read_from_zspool(struct zram *zram, struct page *page, u32 index)
2139 {
2140 if (test_slot_flag(zram, index, ZRAM_SAME) ||
2141 !get_slot_handle(zram, index))
2142 return read_same_filled_page(zram, page, index);
2143
2144 if (!test_slot_flag(zram, index, ZRAM_HUGE))
2145 return read_compressed_page(zram, page, index);
2146 else
2147 return read_incompressible_page(zram, page, index);
2148 }
2149
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)2150 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
2151 struct bio *parent)
2152 {
2153 int ret;
2154
2155 slot_lock(zram, index);
2156 if (!test_slot_flag(zram, index, ZRAM_WB)) {
2157 /* Slot should be locked through out the function call */
2158 ret = read_from_zspool(zram, page, index);
2159 slot_unlock(zram, index);
2160 } else {
2161 unsigned long blk_idx = get_slot_handle(zram, index);
2162
2163 /*
2164 * The slot should be unlocked before reading from the backing
2165 * device.
2166 */
2167 slot_unlock(zram, index);
2168 ret = read_from_bdev(zram, page, index, blk_idx, parent);
2169 }
2170
2171 /* Should NEVER happen. Return bio error if it does. */
2172 if (WARN_ON(ret < 0))
2173 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
2174
2175 return ret;
2176 }
2177
2178 /*
2179 * Use a temporary buffer to decompress the page, as the decompressor
2180 * always expects a full page for the output.
2181 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)2182 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
2183 u32 index, int offset)
2184 {
2185 struct page *page = alloc_page(GFP_NOIO);
2186 int ret;
2187
2188 if (!page)
2189 return -ENOMEM;
2190 ret = zram_read_page(zram, page, index, NULL);
2191 if (likely(!ret))
2192 memcpy_to_bvec(bvec, page_address(page) + offset);
2193 __free_page(page);
2194 return ret;
2195 }
2196
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2197 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
2198 u32 index, int offset, struct bio *bio)
2199 {
2200 if (is_partial_io(bvec))
2201 return zram_bvec_read_partial(zram, bvec, index, offset);
2202 return zram_read_page(zram, bvec->bv_page, index, bio);
2203 }
2204
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)2205 static int write_same_filled_page(struct zram *zram, unsigned long fill,
2206 u32 index)
2207 {
2208 slot_lock(zram, index);
2209 slot_free(zram, index);
2210 set_slot_flag(zram, index, ZRAM_SAME);
2211 set_slot_handle(zram, index, fill);
2212 slot_unlock(zram, index);
2213
2214 atomic64_inc(&zram->stats.same_pages);
2215 atomic64_inc(&zram->stats.pages_stored);
2216
2217 return 0;
2218 }
2219
write_incompressible_page(struct zram * zram,struct page * page,u32 index)2220 static int write_incompressible_page(struct zram *zram, struct page *page,
2221 u32 index)
2222 {
2223 unsigned long handle;
2224 void *src;
2225
2226 /*
2227 * This function is called from preemptible context so we don't need
2228 * to do optimistic and fallback to pessimistic handle allocation,
2229 * like we do for compressible pages.
2230 */
2231 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
2232 GFP_NOIO | __GFP_NOWARN |
2233 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2234 if (IS_ERR_VALUE(handle))
2235 return PTR_ERR((void *)handle);
2236
2237 if (!zram_can_store_page(zram)) {
2238 zs_free(zram->mem_pool, handle);
2239 return -ENOMEM;
2240 }
2241
2242 src = kmap_local_page(page);
2243 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
2244 kunmap_local(src);
2245
2246 slot_lock(zram, index);
2247 slot_free(zram, index);
2248 set_slot_flag(zram, index, ZRAM_HUGE);
2249 set_slot_handle(zram, index, handle);
2250 set_slot_size(zram, index, PAGE_SIZE);
2251 slot_unlock(zram, index);
2252
2253 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
2254 atomic64_inc(&zram->stats.huge_pages);
2255 atomic64_inc(&zram->stats.huge_pages_since);
2256 atomic64_inc(&zram->stats.pages_stored);
2257
2258 return 0;
2259 }
2260
zram_write_page(struct zram * zram,struct page * page,u32 index)2261 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
2262 {
2263 int ret = 0;
2264 unsigned long handle;
2265 unsigned int comp_len;
2266 void *mem;
2267 struct zcomp_strm *zstrm;
2268 unsigned long element;
2269 bool same_filled;
2270
2271 mem = kmap_local_page(page);
2272 same_filled = page_same_filled(mem, &element);
2273 kunmap_local(mem);
2274 if (same_filled)
2275 return write_same_filled_page(zram, element, index);
2276
2277 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2278 mem = kmap_local_page(page);
2279 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
2280 mem, &comp_len);
2281 kunmap_local(mem);
2282
2283 if (unlikely(ret)) {
2284 zcomp_stream_put(zstrm);
2285 pr_err("Compression failed! err=%d\n", ret);
2286 return ret;
2287 }
2288
2289 if (comp_len >= huge_class_size) {
2290 zcomp_stream_put(zstrm);
2291 return write_incompressible_page(zram, page, index);
2292 }
2293
2294 handle = zs_malloc(zram->mem_pool, comp_len,
2295 GFP_NOIO | __GFP_NOWARN |
2296 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2297 if (IS_ERR_VALUE(handle)) {
2298 zcomp_stream_put(zstrm);
2299 return PTR_ERR((void *)handle);
2300 }
2301
2302 if (!zram_can_store_page(zram)) {
2303 zcomp_stream_put(zstrm);
2304 zs_free(zram->mem_pool, handle);
2305 return -ENOMEM;
2306 }
2307
2308 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
2309 zcomp_stream_put(zstrm);
2310
2311 slot_lock(zram, index);
2312 slot_free(zram, index);
2313 set_slot_handle(zram, index, handle);
2314 set_slot_size(zram, index, comp_len);
2315 slot_unlock(zram, index);
2316
2317 /* Update stats */
2318 atomic64_inc(&zram->stats.pages_stored);
2319 atomic64_add(comp_len, &zram->stats.compr_data_size);
2320
2321 return ret;
2322 }
2323
2324 /*
2325 * This is a partial IO. Read the full page before writing the changes.
2326 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2327 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
2328 u32 index, int offset, struct bio *bio)
2329 {
2330 struct page *page = alloc_page(GFP_NOIO);
2331 int ret;
2332
2333 if (!page)
2334 return -ENOMEM;
2335
2336 ret = zram_read_page(zram, page, index, bio);
2337 if (!ret) {
2338 memcpy_from_bvec(page_address(page) + offset, bvec);
2339 ret = zram_write_page(zram, page, index);
2340 }
2341 __free_page(page);
2342 return ret;
2343 }
2344
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2345 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
2346 u32 index, int offset, struct bio *bio)
2347 {
2348 if (is_partial_io(bvec))
2349 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
2350 return zram_write_page(zram, bvec->bv_page, index);
2351 }
2352
2353 #ifdef CONFIG_ZRAM_MULTI_COMP
2354 #define RECOMPRESS_IDLE (1 << 0)
2355 #define RECOMPRESS_HUGE (1 << 1)
2356
highest_priority_algorithm(struct zram * zram,u32 prio)2357 static bool highest_priority_algorithm(struct zram *zram, u32 prio)
2358 {
2359 u32 p;
2360
2361 for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) {
2362 if (zram->comp_algs[p])
2363 return false;
2364 }
2365
2366 return true;
2367 }
2368
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio,struct zram_pp_ctl * ctl)2369 static void scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio,
2370 struct zram_pp_ctl *ctl)
2371 {
2372 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
2373 unsigned long index;
2374
2375 for (index = 0; index < nr_pages; index++) {
2376 bool ok = true;
2377
2378 slot_lock(zram, index);
2379 if (!slot_allocated(zram, index))
2380 goto next;
2381
2382 if (mode & RECOMPRESS_IDLE &&
2383 !test_slot_flag(zram, index, ZRAM_IDLE))
2384 goto next;
2385
2386 if (mode & RECOMPRESS_HUGE &&
2387 !test_slot_flag(zram, index, ZRAM_HUGE))
2388 goto next;
2389
2390 if (test_slot_flag(zram, index, ZRAM_WB) ||
2391 test_slot_flag(zram, index, ZRAM_SAME) ||
2392 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
2393 goto next;
2394
2395 /* Already compressed with same or higher priority */
2396 if (get_slot_comp_priority(zram, index) >= prio)
2397 goto next;
2398
2399 ok = place_pp_slot(zram, ctl, index);
2400 next:
2401 slot_unlock(zram, index);
2402 if (!ok)
2403 break;
2404 }
2405 }
2406
2407 /*
2408 * This function will decompress (unless it's ZRAM_HUGE) the page and then
2409 * attempt to compress it using provided compression algorithm priority
2410 * (which is potentially more effective).
2411 *
2412 * Corresponding ZRAM slot should be locked.
2413 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio)2414 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
2415 u64 *num_recomp_pages, u32 threshold, u32 prio)
2416 {
2417 struct zcomp_strm *zstrm = NULL;
2418 unsigned long handle_old;
2419 unsigned long handle_new;
2420 unsigned int comp_len_old;
2421 unsigned int comp_len_new;
2422 unsigned int class_index_old;
2423 unsigned int class_index_new;
2424 void *src;
2425 int ret = 0;
2426
2427 handle_old = get_slot_handle(zram, index);
2428 if (!handle_old)
2429 return -EINVAL;
2430
2431 comp_len_old = get_slot_size(zram, index);
2432 /*
2433 * Do not recompress objects that are already "small enough".
2434 */
2435 if (comp_len_old < threshold)
2436 return 0;
2437
2438 ret = read_from_zspool(zram, page, index);
2439 if (ret)
2440 return ret;
2441
2442 /*
2443 * We touched this entry so mark it as non-IDLE. This makes sure that
2444 * we don't preserve IDLE flag and don't incorrectly pick this entry
2445 * for different post-processing type (e.g. writeback).
2446 */
2447 clear_slot_flag(zram, index, ZRAM_IDLE);
2448
2449 zstrm = zcomp_stream_get(zram->comps[prio]);
2450 src = kmap_local_page(page);
2451 ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new);
2452 kunmap_local(src);
2453
2454 /*
2455 * Decrement the limit (if set) on pages we can recompress, even
2456 * when current recompression was unsuccessful or did not compress
2457 * the page below the threshold, because we still spent resources
2458 * on it.
2459 */
2460 if (*num_recomp_pages)
2461 *num_recomp_pages -= 1;
2462
2463 if (ret) {
2464 zcomp_stream_put(zstrm);
2465 return ret;
2466 }
2467
2468 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2469 class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new);
2470
2471 if (class_index_new >= class_index_old ||
2472 (threshold && comp_len_new >= threshold)) {
2473 zcomp_stream_put(zstrm);
2474
2475 /*
2476 * Secondary algorithms failed to re-compress the page
2477 * in a way that would save memory.
2478 *
2479 * Mark the object incompressible if the max-priority (the
2480 * last configured one) algorithm couldn't re-compress it.
2481 */
2482 if (highest_priority_algorithm(zram, prio))
2483 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2484 return 0;
2485 }
2486
2487 /*
2488 * We are holding per-CPU stream mutex and entry lock so better
2489 * avoid direct reclaim. Allocation error is not fatal since
2490 * we still have the old object in the mem_pool.
2491 *
2492 * XXX: technically, the node we really want here is the node that
2493 * holds the original compressed data. But that would require us to
2494 * modify zsmalloc API to return this information. For now, we will
2495 * make do with the node of the page allocated for recompression.
2496 */
2497 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2498 GFP_NOIO | __GFP_NOWARN |
2499 __GFP_HIGHMEM | __GFP_MOVABLE,
2500 page_to_nid(page));
2501 if (IS_ERR_VALUE(handle_new)) {
2502 zcomp_stream_put(zstrm);
2503 return PTR_ERR((void *)handle_new);
2504 }
2505
2506 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2507 zcomp_stream_put(zstrm);
2508
2509 slot_free(zram, index);
2510 set_slot_handle(zram, index, handle_new);
2511 set_slot_size(zram, index, comp_len_new);
2512 set_slot_comp_priority(zram, index, prio);
2513
2514 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2515 atomic64_inc(&zram->stats.pages_stored);
2516
2517 return 0;
2518 }
2519
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2520 static ssize_t recompress_store(struct device *dev,
2521 struct device_attribute *attr,
2522 const char *buf, size_t len)
2523 {
2524 struct zram *zram = dev_to_zram(dev);
2525 char *args, *param, *val, *algo = NULL;
2526 u64 num_recomp_pages = ULLONG_MAX;
2527 struct zram_pp_ctl *ctl = NULL;
2528 s32 prio = ZRAM_SECONDARY_COMP;
2529 u32 mode = 0, threshold = 0;
2530 struct zram_pp_slot *pps;
2531 struct page *page = NULL;
2532 bool prio_param = false;
2533 ssize_t ret;
2534
2535 args = skip_spaces(buf);
2536 while (*args) {
2537 args = next_arg(args, ¶m, &val);
2538
2539 if (!val || !*val)
2540 return -EINVAL;
2541
2542 if (!strcmp(param, "type")) {
2543 if (!strcmp(val, "idle"))
2544 mode = RECOMPRESS_IDLE;
2545 if (!strcmp(val, "huge"))
2546 mode = RECOMPRESS_HUGE;
2547 if (!strcmp(val, "huge_idle"))
2548 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2549 if (!mode)
2550 return -EINVAL;
2551 continue;
2552 }
2553
2554 if (!strcmp(param, "max_pages")) {
2555 /*
2556 * Limit the number of entries (pages) we attempt to
2557 * recompress.
2558 */
2559 ret = kstrtoull(val, 10, &num_recomp_pages);
2560 if (ret)
2561 return ret;
2562 continue;
2563 }
2564
2565 if (!strcmp(param, "threshold")) {
2566 /*
2567 * We will re-compress only idle objects equal or
2568 * greater in size than watermark.
2569 */
2570 ret = kstrtouint(val, 10, &threshold);
2571 if (ret)
2572 return ret;
2573 continue;
2574 }
2575
2576 if (!strcmp(param, "algo")) {
2577 algo = val;
2578 continue;
2579 }
2580
2581 if (!strcmp(param, "priority")) {
2582 prio_param = true;
2583 ret = kstrtoint(val, 10, &prio);
2584 if (ret)
2585 return ret;
2586 continue;
2587 }
2588 }
2589
2590 if (threshold >= huge_class_size)
2591 return -EINVAL;
2592
2593 guard(rwsem_write)(&zram->dev_lock);
2594 if (!init_done(zram))
2595 return -EINVAL;
2596
2597 if (prio_param) {
2598 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
2599 return -EINVAL;
2600 }
2601
2602 if (algo && prio_param) {
2603 ret = validate_algo_priority(zram, algo, prio);
2604 if (ret)
2605 return ret;
2606 }
2607
2608 if (algo && !prio_param) {
2609 prio = lookup_algo_priority(zram, algo, ZRAM_SECONDARY_COMP);
2610 if (prio < 0)
2611 return -EINVAL;
2612 }
2613
2614 if (!zram->comps[prio])
2615 return -EINVAL;
2616
2617 page = alloc_page(GFP_KERNEL);
2618 if (!page) {
2619 ret = -ENOMEM;
2620 goto out;
2621 }
2622
2623 ctl = init_pp_ctl();
2624 if (!ctl) {
2625 ret = -ENOMEM;
2626 goto out;
2627 }
2628
2629 scan_slots_for_recompress(zram, mode, prio, ctl);
2630
2631 ret = len;
2632 while ((pps = select_pp_slot(ctl))) {
2633 int err = 0;
2634
2635 if (!num_recomp_pages)
2636 break;
2637
2638 slot_lock(zram, pps->index);
2639 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT))
2640 goto next;
2641
2642 err = recompress_slot(zram, pps->index, page,
2643 &num_recomp_pages, threshold, prio);
2644 next:
2645 slot_unlock(zram, pps->index);
2646 release_pp_slot(zram, pps);
2647
2648 if (err) {
2649 ret = err;
2650 break;
2651 }
2652
2653 cond_resched();
2654 }
2655
2656 out:
2657 if (page)
2658 __free_page(page);
2659 release_pp_ctl(zram, ctl);
2660 return ret;
2661 }
2662 #endif
2663
zram_bio_discard(struct zram * zram,struct bio * bio)2664 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2665 {
2666 size_t n = bio->bi_iter.bi_size;
2667 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2668 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2669 SECTOR_SHIFT;
2670
2671 /*
2672 * zram manages data in physical block size units. Because logical block
2673 * size isn't identical with physical block size on some arch, we
2674 * could get a discard request pointing to a specific offset within a
2675 * certain physical block. Although we can handle this request by
2676 * reading that physiclal block and decompressing and partially zeroing
2677 * and re-compressing and then re-storing it, this isn't reasonable
2678 * because our intent with a discard request is to save memory. So
2679 * skipping this logical block is appropriate here.
2680 */
2681 if (offset) {
2682 if (n <= (PAGE_SIZE - offset))
2683 goto end_bio;
2684
2685 n -= (PAGE_SIZE - offset);
2686 index++;
2687 }
2688
2689 while (n >= PAGE_SIZE) {
2690 slot_lock(zram, index);
2691 slot_free(zram, index);
2692 slot_unlock(zram, index);
2693 atomic64_inc(&zram->stats.notify_free);
2694 index++;
2695 n -= PAGE_SIZE;
2696 }
2697
2698 end_bio:
2699 bio_endio(bio);
2700 }
2701
zram_bio_read(struct zram * zram,struct bio * bio)2702 static void zram_bio_read(struct zram *zram, struct bio *bio)
2703 {
2704 unsigned long start_time = bio_start_io_acct(bio);
2705 struct bvec_iter iter = bio->bi_iter;
2706
2707 do {
2708 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2709 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2710 SECTOR_SHIFT;
2711 struct bio_vec bv = bio_iter_iovec(bio, iter);
2712
2713 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2714
2715 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2716 atomic64_inc(&zram->stats.failed_reads);
2717 bio->bi_status = BLK_STS_IOERR;
2718 break;
2719 }
2720 flush_dcache_page(bv.bv_page);
2721
2722 slot_lock(zram, index);
2723 mark_slot_accessed(zram, index);
2724 slot_unlock(zram, index);
2725
2726 bio_advance_iter_single(bio, &iter, bv.bv_len);
2727 } while (iter.bi_size);
2728
2729 bio_end_io_acct(bio, start_time);
2730 bio_endio(bio);
2731 }
2732
zram_bio_write(struct zram * zram,struct bio * bio)2733 static void zram_bio_write(struct zram *zram, struct bio *bio)
2734 {
2735 unsigned long start_time = bio_start_io_acct(bio);
2736 struct bvec_iter iter = bio->bi_iter;
2737
2738 do {
2739 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2740 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2741 SECTOR_SHIFT;
2742 struct bio_vec bv = bio_iter_iovec(bio, iter);
2743
2744 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2745
2746 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2747 atomic64_inc(&zram->stats.failed_writes);
2748 bio->bi_status = BLK_STS_IOERR;
2749 break;
2750 }
2751
2752 slot_lock(zram, index);
2753 mark_slot_accessed(zram, index);
2754 slot_unlock(zram, index);
2755
2756 bio_advance_iter_single(bio, &iter, bv.bv_len);
2757 } while (iter.bi_size);
2758
2759 bio_end_io_acct(bio, start_time);
2760 bio_endio(bio);
2761 }
2762
2763 /*
2764 * Handler function for all zram I/O requests.
2765 */
zram_submit_bio(struct bio * bio)2766 static void zram_submit_bio(struct bio *bio)
2767 {
2768 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2769
2770 switch (bio_op(bio)) {
2771 case REQ_OP_READ:
2772 zram_bio_read(zram, bio);
2773 break;
2774 case REQ_OP_WRITE:
2775 zram_bio_write(zram, bio);
2776 break;
2777 case REQ_OP_DISCARD:
2778 case REQ_OP_WRITE_ZEROES:
2779 zram_bio_discard(zram, bio);
2780 break;
2781 default:
2782 WARN_ON_ONCE(1);
2783 bio_endio(bio);
2784 }
2785 }
2786
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2787 static void zram_slot_free_notify(struct block_device *bdev,
2788 unsigned long index)
2789 {
2790 struct zram *zram;
2791
2792 zram = bdev->bd_disk->private_data;
2793
2794 atomic64_inc(&zram->stats.notify_free);
2795 if (!slot_trylock(zram, index)) {
2796 atomic64_inc(&zram->stats.miss_free);
2797 return;
2798 }
2799
2800 slot_free(zram, index);
2801 slot_unlock(zram, index);
2802 }
2803
zram_comp_params_reset(struct zram * zram)2804 static void zram_comp_params_reset(struct zram *zram)
2805 {
2806 u32 prio;
2807
2808 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2809 comp_params_reset(zram, prio);
2810 }
2811 }
2812
zram_destroy_comps(struct zram * zram)2813 static void zram_destroy_comps(struct zram *zram)
2814 {
2815 u32 prio;
2816
2817 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2818 struct zcomp *comp = zram->comps[prio];
2819
2820 zram->comps[prio] = NULL;
2821 if (!comp)
2822 continue;
2823 zcomp_destroy(comp);
2824 }
2825
2826 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++)
2827 zram->comp_algs[prio] = NULL;
2828
2829 zram_comp_params_reset(zram);
2830 }
2831
zram_reset_device(struct zram * zram)2832 static void zram_reset_device(struct zram *zram)
2833 {
2834 guard(rwsem_write)(&zram->dev_lock);
2835
2836 zram->limit_pages = 0;
2837
2838 set_capacity_and_notify(zram->disk, 0);
2839 part_stat_set_all(zram->disk->part0, 0);
2840
2841 /* I/O operation under all of CPU are done so let's free */
2842 zram_meta_free(zram, zram->disksize);
2843 zram->disksize = 0;
2844 zram_destroy_comps(zram);
2845 memset(&zram->stats, 0, sizeof(zram->stats));
2846 reset_bdev(zram);
2847
2848 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2849 }
2850
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2851 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr,
2852 const char *buf, size_t len)
2853 {
2854 u64 disksize;
2855 struct zcomp *comp;
2856 struct zram *zram = dev_to_zram(dev);
2857 int err;
2858 u32 prio;
2859
2860 disksize = memparse(buf, NULL);
2861 if (!disksize)
2862 return -EINVAL;
2863
2864 guard(rwsem_write)(&zram->dev_lock);
2865 if (init_done(zram)) {
2866 pr_info("Cannot change disksize for initialized device\n");
2867 return -EBUSY;
2868 }
2869
2870 disksize = PAGE_ALIGN(disksize);
2871 if (!zram_meta_alloc(zram, disksize))
2872 return -ENOMEM;
2873
2874 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2875 if (!zram->comp_algs[prio])
2876 continue;
2877
2878 comp = zcomp_create(zram->comp_algs[prio],
2879 &zram->params[prio]);
2880 if (IS_ERR(comp)) {
2881 pr_err("Cannot initialise %s compressing backend\n",
2882 zram->comp_algs[prio]);
2883 err = PTR_ERR(comp);
2884 goto out_free_comps;
2885 }
2886
2887 zram->comps[prio] = comp;
2888 }
2889 zram->disksize = disksize;
2890 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2891
2892 return len;
2893
2894 out_free_comps:
2895 zram_destroy_comps(zram);
2896 zram_meta_free(zram, disksize);
2897 return err;
2898 }
2899
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2900 static ssize_t reset_store(struct device *dev,
2901 struct device_attribute *attr, const char *buf, size_t len)
2902 {
2903 int ret;
2904 unsigned short do_reset;
2905 struct zram *zram;
2906 struct gendisk *disk;
2907
2908 ret = kstrtou16(buf, 10, &do_reset);
2909 if (ret)
2910 return ret;
2911
2912 if (!do_reset)
2913 return -EINVAL;
2914
2915 zram = dev_to_zram(dev);
2916 disk = zram->disk;
2917
2918 mutex_lock(&disk->open_mutex);
2919 /* Do not reset an active device or claimed device */
2920 if (disk_openers(disk) || zram->claim) {
2921 mutex_unlock(&disk->open_mutex);
2922 return -EBUSY;
2923 }
2924
2925 /* From now on, anyone can't open /dev/zram[0-9] */
2926 zram->claim = true;
2927 mutex_unlock(&disk->open_mutex);
2928
2929 /* Make sure all the pending I/O are finished */
2930 sync_blockdev(disk->part0);
2931 zram_reset_device(zram);
2932
2933 mutex_lock(&disk->open_mutex);
2934 zram->claim = false;
2935 mutex_unlock(&disk->open_mutex);
2936
2937 return len;
2938 }
2939
zram_open(struct gendisk * disk,blk_mode_t mode)2940 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2941 {
2942 struct zram *zram = disk->private_data;
2943
2944 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2945
2946 /* zram was claimed to reset so open request fails */
2947 if (zram->claim)
2948 return -EBUSY;
2949 return 0;
2950 }
2951
2952 static const struct block_device_operations zram_devops = {
2953 .open = zram_open,
2954 .submit_bio = zram_submit_bio,
2955 .swap_slot_free_notify = zram_slot_free_notify,
2956 .owner = THIS_MODULE
2957 };
2958
2959 static DEVICE_ATTR_RO(io_stat);
2960 static DEVICE_ATTR_RO(mm_stat);
2961 static DEVICE_ATTR_RO(debug_stat);
2962 static DEVICE_ATTR_WO(compact);
2963 static DEVICE_ATTR_RW(disksize);
2964 static DEVICE_ATTR_RO(initstate);
2965 static DEVICE_ATTR_WO(reset);
2966 static DEVICE_ATTR_WO(mem_limit);
2967 static DEVICE_ATTR_WO(mem_used_max);
2968 static DEVICE_ATTR_WO(idle);
2969 static DEVICE_ATTR_RW(comp_algorithm);
2970 #ifdef CONFIG_ZRAM_WRITEBACK
2971 static DEVICE_ATTR_RO(bd_stat);
2972 static DEVICE_ATTR_RW(backing_dev);
2973 static DEVICE_ATTR_WO(writeback);
2974 static DEVICE_ATTR_RW(writeback_limit);
2975 static DEVICE_ATTR_RW(writeback_limit_enable);
2976 static DEVICE_ATTR_RW(writeback_batch_size);
2977 static DEVICE_ATTR_RW(compressed_writeback);
2978 #endif
2979 #ifdef CONFIG_ZRAM_MULTI_COMP
2980 static DEVICE_ATTR_RW(recomp_algorithm);
2981 static DEVICE_ATTR_WO(recompress);
2982 #endif
2983 static DEVICE_ATTR_WO(algorithm_params);
2984
2985 static struct attribute *zram_disk_attrs[] = {
2986 &dev_attr_disksize.attr,
2987 &dev_attr_initstate.attr,
2988 &dev_attr_reset.attr,
2989 &dev_attr_compact.attr,
2990 &dev_attr_mem_limit.attr,
2991 &dev_attr_mem_used_max.attr,
2992 &dev_attr_idle.attr,
2993 &dev_attr_comp_algorithm.attr,
2994 #ifdef CONFIG_ZRAM_WRITEBACK
2995 &dev_attr_bd_stat.attr,
2996 &dev_attr_backing_dev.attr,
2997 &dev_attr_writeback.attr,
2998 &dev_attr_writeback_limit.attr,
2999 &dev_attr_writeback_limit_enable.attr,
3000 &dev_attr_writeback_batch_size.attr,
3001 &dev_attr_compressed_writeback.attr,
3002 #endif
3003 &dev_attr_io_stat.attr,
3004 &dev_attr_mm_stat.attr,
3005 &dev_attr_debug_stat.attr,
3006 #ifdef CONFIG_ZRAM_MULTI_COMP
3007 &dev_attr_recomp_algorithm.attr,
3008 &dev_attr_recompress.attr,
3009 #endif
3010 &dev_attr_algorithm_params.attr,
3011 NULL,
3012 };
3013
3014 ATTRIBUTE_GROUPS(zram_disk);
3015
3016 /*
3017 * Allocate and initialize new zram device. the function returns
3018 * '>= 0' device_id upon success, and negative value otherwise.
3019 */
zram_add(void)3020 static int zram_add(void)
3021 {
3022 struct queue_limits lim = {
3023 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
3024 /*
3025 * To ensure that we always get PAGE_SIZE aligned and
3026 * n*PAGE_SIZED sized I/O requests.
3027 */
3028 .physical_block_size = PAGE_SIZE,
3029 .io_min = PAGE_SIZE,
3030 .io_opt = PAGE_SIZE,
3031 .max_hw_discard_sectors = UINT_MAX,
3032 /*
3033 * zram_bio_discard() will clear all logical blocks if logical
3034 * block size is identical with physical block size(PAGE_SIZE).
3035 * But if it is different, we will skip discarding some parts of
3036 * logical blocks in the part of the request range which isn't
3037 * aligned to physical block size. So we can't ensure that all
3038 * discarded logical blocks are zeroed.
3039 */
3040 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
3041 .max_write_zeroes_sectors = UINT_MAX,
3042 #endif
3043 .features = BLK_FEAT_STABLE_WRITES |
3044 BLK_FEAT_SYNCHRONOUS,
3045 };
3046 struct zram *zram;
3047 int ret, device_id;
3048
3049 zram = kzalloc_obj(struct zram);
3050 if (!zram)
3051 return -ENOMEM;
3052
3053 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
3054 if (ret < 0)
3055 goto out_free_dev;
3056 device_id = ret;
3057
3058 init_rwsem(&zram->dev_lock);
3059 #ifdef CONFIG_ZRAM_WRITEBACK
3060 zram->wb_batch_size = 32;
3061 zram->compressed_wb = false;
3062 #endif
3063
3064 /* gendisk structure */
3065 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
3066 if (IS_ERR(zram->disk)) {
3067 pr_err("Error allocating disk structure for device %d\n",
3068 device_id);
3069 ret = PTR_ERR(zram->disk);
3070 goto out_free_idr;
3071 }
3072
3073 zram->disk->major = zram_major;
3074 zram->disk->first_minor = device_id;
3075 zram->disk->minors = 1;
3076 zram->disk->flags |= GENHD_FL_NO_PART;
3077 zram->disk->fops = &zram_devops;
3078 zram->disk->private_data = zram;
3079 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
3080 zram_comp_params_reset(zram);
3081 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
3082
3083 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
3084 set_capacity(zram->disk, 0);
3085 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
3086 if (ret)
3087 goto out_cleanup_disk;
3088
3089 zram_debugfs_register(zram);
3090 pr_info("Added device: %s\n", zram->disk->disk_name);
3091 return device_id;
3092
3093 out_cleanup_disk:
3094 put_disk(zram->disk);
3095 out_free_idr:
3096 idr_remove(&zram_index_idr, device_id);
3097 out_free_dev:
3098 kfree(zram);
3099 return ret;
3100 }
3101
zram_remove(struct zram * zram)3102 static int zram_remove(struct zram *zram)
3103 {
3104 bool claimed;
3105
3106 mutex_lock(&zram->disk->open_mutex);
3107 if (disk_openers(zram->disk)) {
3108 mutex_unlock(&zram->disk->open_mutex);
3109 return -EBUSY;
3110 }
3111
3112 claimed = zram->claim;
3113 if (!claimed)
3114 zram->claim = true;
3115 mutex_unlock(&zram->disk->open_mutex);
3116
3117 zram_debugfs_unregister(zram);
3118
3119 if (claimed) {
3120 /*
3121 * If we were claimed by reset_store(), del_gendisk() will
3122 * wait until reset_store() is done, so nothing need to do.
3123 */
3124 ;
3125 } else {
3126 /* Make sure all the pending I/O are finished */
3127 sync_blockdev(zram->disk->part0);
3128 zram_reset_device(zram);
3129 }
3130
3131 pr_info("Removed device: %s\n", zram->disk->disk_name);
3132
3133 del_gendisk(zram->disk);
3134
3135 /* del_gendisk drains pending reset_store */
3136 WARN_ON_ONCE(claimed && zram->claim);
3137
3138 /*
3139 * disksize_store() may be called in between zram_reset_device()
3140 * and del_gendisk(), so run the last reset to avoid leaking
3141 * anything allocated with disksize_store()
3142 */
3143 zram_reset_device(zram);
3144
3145 put_disk(zram->disk);
3146 kfree(zram);
3147 return 0;
3148 }
3149
3150 /* zram-control sysfs attributes */
3151
3152 /*
3153 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
3154 * sense that reading from this file does alter the state of your system -- it
3155 * creates a new un-initialized zram device and returns back this device's
3156 * device_id (or an error code if it fails to create a new device).
3157 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)3158 static ssize_t hot_add_show(const struct class *class,
3159 const struct class_attribute *attr,
3160 char *buf)
3161 {
3162 int ret;
3163
3164 mutex_lock(&zram_index_mutex);
3165 ret = zram_add();
3166 mutex_unlock(&zram_index_mutex);
3167
3168 if (ret < 0)
3169 return ret;
3170 return sysfs_emit(buf, "%d\n", ret);
3171 }
3172 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
3173 static struct class_attribute class_attr_hot_add =
3174 __ATTR(hot_add, 0400, hot_add_show, NULL);
3175
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)3176 static ssize_t hot_remove_store(const struct class *class,
3177 const struct class_attribute *attr,
3178 const char *buf,
3179 size_t count)
3180 {
3181 struct zram *zram;
3182 int ret, dev_id;
3183
3184 /* dev_id is gendisk->first_minor, which is `int' */
3185 ret = kstrtoint(buf, 10, &dev_id);
3186 if (ret)
3187 return ret;
3188 if (dev_id < 0)
3189 return -EINVAL;
3190
3191 mutex_lock(&zram_index_mutex);
3192
3193 zram = idr_find(&zram_index_idr, dev_id);
3194 if (zram) {
3195 ret = zram_remove(zram);
3196 if (!ret)
3197 idr_remove(&zram_index_idr, dev_id);
3198 } else {
3199 ret = -ENODEV;
3200 }
3201
3202 mutex_unlock(&zram_index_mutex);
3203 return ret ? ret : count;
3204 }
3205 static CLASS_ATTR_WO(hot_remove);
3206
3207 static struct attribute *zram_control_class_attrs[] = {
3208 &class_attr_hot_add.attr,
3209 &class_attr_hot_remove.attr,
3210 NULL,
3211 };
3212 ATTRIBUTE_GROUPS(zram_control_class);
3213
3214 static struct class zram_control_class = {
3215 .name = "zram-control",
3216 .class_groups = zram_control_class_groups,
3217 };
3218
zram_remove_cb(int id,void * ptr,void * data)3219 static int zram_remove_cb(int id, void *ptr, void *data)
3220 {
3221 WARN_ON_ONCE(zram_remove(ptr));
3222 return 0;
3223 }
3224
destroy_devices(void)3225 static void destroy_devices(void)
3226 {
3227 class_unregister(&zram_control_class);
3228 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
3229 zram_debugfs_destroy();
3230 idr_destroy(&zram_index_idr);
3231 unregister_blkdev(zram_major, "zram");
3232 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3233 }
3234
zram_init(void)3235 static int __init zram_init(void)
3236 {
3237 struct zram_table_entry zram_te;
3238 int ret;
3239
3240 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8);
3241
3242 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
3243 zcomp_cpu_up_prepare, zcomp_cpu_dead);
3244 if (ret < 0)
3245 return ret;
3246
3247 ret = class_register(&zram_control_class);
3248 if (ret) {
3249 pr_err("Unable to register zram-control class\n");
3250 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3251 return ret;
3252 }
3253
3254 zram_debugfs_create();
3255 zram_major = register_blkdev(0, "zram");
3256 if (zram_major <= 0) {
3257 pr_err("Unable to get major number\n");
3258 class_unregister(&zram_control_class);
3259 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3260 return -EBUSY;
3261 }
3262
3263 while (num_devices != 0) {
3264 mutex_lock(&zram_index_mutex);
3265 ret = zram_add();
3266 mutex_unlock(&zram_index_mutex);
3267 if (ret < 0)
3268 goto out_error;
3269 num_devices--;
3270 }
3271
3272 return 0;
3273
3274 out_error:
3275 destroy_devices();
3276 return ret;
3277 }
3278
zram_exit(void)3279 static void __exit zram_exit(void)
3280 {
3281 destroy_devices();
3282 }
3283
3284 module_init(zram_init);
3285 module_exit(zram_exit);
3286
3287 module_param(num_devices, uint, 0);
3288 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
3289
3290 MODULE_LICENSE("Dual BSD/GPL");
3291 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
3292 MODULE_DESCRIPTION("Compressed RAM Block Device");
3293