1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define pr_fmt(fmt) "zram: " fmt
16
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/bio.h>
20 #include <linux/bitops.h>
21 #include <linux/blkdev.h>
22 #include <linux/buffer_head.h>
23 #include <linux/device.h>
24 #include <linux/highmem.h>
25 #include <linux/slab.h>
26 #include <linux/backing-dev.h>
27 #include <linux/string.h>
28 #include <linux/vmalloc.h>
29 #include <linux/err.h>
30 #include <linux/idr.h>
31 #include <linux/sysfs.h>
32 #include <linux/debugfs.h>
33 #include <linux/cpuhotplug.h>
34 #include <linux/part_stat.h>
35 #include <linux/kernel_read_file.h>
36
37 #include "zram_drv.h"
38
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42
43 static int zram_major;
44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
45
46 #define ZRAM_MAX_ALGO_NAME_SZ 128
47
48 /* Module params (documentation at end) */
49 static unsigned int num_devices = 1;
50 /*
51 * Pages that compress to sizes equals or greater than this are stored
52 * uncompressed in memory.
53 */
54 static size_t huge_class_size;
55
56 static const struct block_device_operations zram_devops;
57
58 static void slot_free(struct zram *zram, u32 index);
59 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
60
slot_lock_init(struct zram * zram,u32 index)61 static void slot_lock_init(struct zram *zram, u32 index)
62 {
63 static struct lock_class_key __key;
64
65 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
66 &__key, 0);
67 }
68
69 /*
70 * entry locking rules:
71 *
72 * 1) Lock is exclusive
73 *
74 * 2) lock() function can sleep waiting for the lock
75 *
76 * 3) Lock owner can sleep
77 *
78 * 4) Use TRY lock variant when in atomic context
79 * - must check return value and handle locking failers
80 */
slot_trylock(struct zram * zram,u32 index)81 static __must_check bool slot_trylock(struct zram *zram, u32 index)
82 {
83 unsigned long *lock = &zram->table[index].__lock;
84
85 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
86 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
87 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
88 return true;
89 }
90
91 return false;
92 }
93
slot_lock(struct zram * zram,u32 index)94 static void slot_lock(struct zram *zram, u32 index)
95 {
96 unsigned long *lock = &zram->table[index].__lock;
97
98 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
99 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
100 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
101 }
102
slot_unlock(struct zram * zram,u32 index)103 static void slot_unlock(struct zram *zram, u32 index)
104 {
105 unsigned long *lock = &zram->table[index].__lock;
106
107 mutex_release(slot_dep_map(zram, index), _RET_IP_);
108 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
109 }
110
init_done(struct zram * zram)111 static inline bool init_done(struct zram *zram)
112 {
113 return zram->disksize;
114 }
115
dev_to_zram(struct device * dev)116 static inline struct zram *dev_to_zram(struct device *dev)
117 {
118 return (struct zram *)dev_to_disk(dev)->private_data;
119 }
120
get_slot_handle(struct zram * zram,u32 index)121 static unsigned long get_slot_handle(struct zram *zram, u32 index)
122 {
123 return zram->table[index].handle;
124 }
125
set_slot_handle(struct zram * zram,u32 index,unsigned long handle)126 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle)
127 {
128 zram->table[index].handle = handle;
129 }
130
test_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)131 static bool test_slot_flag(struct zram *zram, u32 index,
132 enum zram_pageflags flag)
133 {
134 return zram->table[index].attr.flags & BIT(flag);
135 }
136
set_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)137 static void set_slot_flag(struct zram *zram, u32 index,
138 enum zram_pageflags flag)
139 {
140 zram->table[index].attr.flags |= BIT(flag);
141 }
142
clear_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)143 static void clear_slot_flag(struct zram *zram, u32 index,
144 enum zram_pageflags flag)
145 {
146 zram->table[index].attr.flags &= ~BIT(flag);
147 }
148
get_slot_size(struct zram * zram,u32 index)149 static size_t get_slot_size(struct zram *zram, u32 index)
150 {
151 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
152 }
153
set_slot_size(struct zram * zram,u32 index,size_t size)154 static void set_slot_size(struct zram *zram, u32 index, size_t size)
155 {
156 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT;
157
158 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size;
159 }
160
slot_allocated(struct zram * zram,u32 index)161 static inline bool slot_allocated(struct zram *zram, u32 index)
162 {
163 return get_slot_size(zram, index) ||
164 test_slot_flag(zram, index, ZRAM_SAME) ||
165 test_slot_flag(zram, index, ZRAM_WB);
166 }
167
set_slot_comp_priority(struct zram * zram,u32 index,u32 prio)168 static inline void set_slot_comp_priority(struct zram *zram, u32 index,
169 u32 prio)
170 {
171 prio &= ZRAM_COMP_PRIORITY_MASK;
172 /*
173 * Clear previous priority value first, in case if we recompress
174 * further an already recompressed page
175 */
176 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
177 ZRAM_COMP_PRIORITY_BIT1);
178 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
179 }
180
get_slot_comp_priority(struct zram * zram,u32 index)181 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index)
182 {
183 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1;
184
185 return prio & ZRAM_COMP_PRIORITY_MASK;
186 }
187
mark_slot_accessed(struct zram * zram,u32 index)188 static void mark_slot_accessed(struct zram *zram, u32 index)
189 {
190 clear_slot_flag(zram, index, ZRAM_IDLE);
191 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
193 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds();
194 #endif
195 }
196
update_used_max(struct zram * zram,const unsigned long pages)197 static inline void update_used_max(struct zram *zram, const unsigned long pages)
198 {
199 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
200
201 do {
202 if (cur_max >= pages)
203 return;
204 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
205 &cur_max, pages));
206 }
207
zram_can_store_page(struct zram * zram)208 static bool zram_can_store_page(struct zram *zram)
209 {
210 unsigned long alloced_pages;
211
212 alloced_pages = zs_get_total_pages(zram->mem_pool);
213 update_used_max(zram, alloced_pages);
214
215 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
216 }
217
218 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)219 static inline bool is_partial_io(struct bio_vec *bvec)
220 {
221 return bvec->bv_len != PAGE_SIZE;
222 }
223 #define ZRAM_PARTIAL_IO 1
224 #else
is_partial_io(struct bio_vec * bvec)225 static inline bool is_partial_io(struct bio_vec *bvec)
226 {
227 return false;
228 }
229 #endif
230
231 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
232 struct zram_pp_slot {
233 unsigned long index;
234 struct list_head entry;
235 };
236
237 /*
238 * A post-processing bucket is, essentially, a size class, this defines
239 * the range (in bytes) of pp-slots sizes in particular bucket.
240 */
241 #define PP_BUCKET_SIZE_RANGE 64
242 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
243
244 struct zram_pp_ctl {
245 struct list_head pp_buckets[NUM_PP_BUCKETS];
246 };
247
init_pp_ctl(void)248 static struct zram_pp_ctl *init_pp_ctl(void)
249 {
250 struct zram_pp_ctl *ctl;
251 u32 idx;
252
253 ctl = kmalloc_obj(*ctl);
254 if (!ctl)
255 return NULL;
256
257 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
258 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
259 return ctl;
260 }
261
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)262 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
263 {
264 list_del_init(&pps->entry);
265
266 slot_lock(zram, pps->index);
267 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
268 slot_unlock(zram, pps->index);
269
270 kfree(pps);
271 }
272
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)273 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
274 {
275 u32 idx;
276
277 if (!ctl)
278 return;
279
280 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
281 while (!list_empty(&ctl->pp_buckets[idx])) {
282 struct zram_pp_slot *pps;
283
284 pps = list_first_entry(&ctl->pp_buckets[idx],
285 struct zram_pp_slot,
286 entry);
287 release_pp_slot(zram, pps);
288 }
289 }
290
291 kfree(ctl);
292 }
293
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)294 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
295 u32 index)
296 {
297 struct zram_pp_slot *pps;
298 u32 bid;
299
300 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN);
301 if (!pps)
302 return false;
303
304 INIT_LIST_HEAD(&pps->entry);
305 pps->index = index;
306
307 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
308 list_add(&pps->entry, &ctl->pp_buckets[bid]);
309
310 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
311 return true;
312 }
313
select_pp_slot(struct zram_pp_ctl * ctl)314 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
315 {
316 struct zram_pp_slot *pps = NULL;
317 s32 idx = NUM_PP_BUCKETS - 1;
318
319 /* The higher the bucket id the more optimal slot post-processing is */
320 while (idx >= 0) {
321 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
322 struct zram_pp_slot,
323 entry);
324 if (pps)
325 break;
326
327 idx--;
328 }
329 return pps;
330 }
331 #endif
332
zram_fill_page(void * ptr,unsigned long len,unsigned long value)333 static inline void zram_fill_page(void *ptr, unsigned long len,
334 unsigned long value)
335 {
336 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
337 memset_l(ptr, value, len / sizeof(unsigned long));
338 }
339
page_same_filled(void * ptr,unsigned long * element)340 static bool page_same_filled(void *ptr, unsigned long *element)
341 {
342 unsigned long *page;
343 unsigned long val;
344 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
345
346 page = (unsigned long *)ptr;
347 val = page[0];
348
349 if (val != page[last_pos])
350 return false;
351
352 for (pos = 1; pos < last_pos; pos++) {
353 if (val != page[pos])
354 return false;
355 }
356
357 *element = val;
358
359 return true;
360 }
361
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)362 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr,
363 char *buf)
364 {
365 u32 val;
366 struct zram *zram = dev_to_zram(dev);
367
368 guard(rwsem_read)(&zram->dev_lock);
369 val = init_done(zram);
370
371 return sysfs_emit(buf, "%u\n", val);
372 }
373
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)374 static ssize_t disksize_show(struct device *dev,
375 struct device_attribute *attr, char *buf)
376 {
377 struct zram *zram = dev_to_zram(dev);
378
379 return sysfs_emit(buf, "%llu\n", zram->disksize);
380 }
381
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)382 static ssize_t mem_limit_store(struct device *dev,
383 struct device_attribute *attr, const char *buf,
384 size_t len)
385 {
386 u64 limit;
387 char *tmp;
388 struct zram *zram = dev_to_zram(dev);
389
390 limit = memparse(buf, &tmp);
391 if (buf == tmp) /* no chars parsed, invalid input */
392 return -EINVAL;
393
394 guard(rwsem_write)(&zram->dev_lock);
395 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
396
397 return len;
398 }
399
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)400 static ssize_t mem_used_max_store(struct device *dev,
401 struct device_attribute *attr,
402 const char *buf, size_t len)
403 {
404 int err;
405 unsigned long val;
406 struct zram *zram = dev_to_zram(dev);
407
408 err = kstrtoul(buf, 10, &val);
409 if (err || val != 0)
410 return -EINVAL;
411
412 guard(rwsem_read)(&zram->dev_lock);
413 if (init_done(zram)) {
414 atomic_long_set(&zram->stats.max_used_pages,
415 zs_get_total_pages(zram->mem_pool));
416 }
417
418 return len;
419 }
420
421 /*
422 * Mark all pages which are older than or equal to cutoff as IDLE.
423 * Callers should hold the zram init lock in read mode
424 */
mark_idle(struct zram * zram,ktime_t cutoff)425 static void mark_idle(struct zram *zram, ktime_t cutoff)
426 {
427 int is_idle = 1;
428 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
429 int index;
430
431 for (index = 0; index < nr_pages; index++) {
432 /*
433 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
434 * post-processing (recompress, writeback) happens to the
435 * ZRAM_SAME slot.
436 *
437 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
438 */
439 slot_lock(zram, index);
440 if (!slot_allocated(zram, index) ||
441 test_slot_flag(zram, index, ZRAM_WB) ||
442 test_slot_flag(zram, index, ZRAM_SAME)) {
443 slot_unlock(zram, index);
444 continue;
445 }
446
447 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
448 is_idle = !cutoff ||
449 ktime_after(cutoff, zram->table[index].attr.ac_time);
450 #endif
451 if (is_idle)
452 set_slot_flag(zram, index, ZRAM_IDLE);
453 else
454 clear_slot_flag(zram, index, ZRAM_IDLE);
455 slot_unlock(zram, index);
456 }
457 }
458
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)459 static ssize_t idle_store(struct device *dev, struct device_attribute *attr,
460 const char *buf, size_t len)
461 {
462 struct zram *zram = dev_to_zram(dev);
463 ktime_t cutoff = 0;
464
465 if (!sysfs_streq(buf, "all")) {
466 /*
467 * If it did not parse as 'all' try to treat it as an integer
468 * when we have memory tracking enabled.
469 */
470 u32 age_sec;
471
472 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) &&
473 !kstrtouint(buf, 0, &age_sec))
474 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(),
475 age_sec);
476 else
477 return -EINVAL;
478 }
479
480 guard(rwsem_read)(&zram->dev_lock);
481 if (!init_done(zram))
482 return -EINVAL;
483
484 /*
485 * A cutoff of 0 marks everything as idle, this is the
486 * "all" behavior.
487 */
488 mark_idle(zram, cutoff);
489 return len;
490 }
491
492 #ifdef CONFIG_ZRAM_WRITEBACK
493 #define INVALID_BDEV_BLOCK (~0UL)
494
495 static int read_from_zspool_raw(struct zram *zram, struct page *page,
496 u32 index);
497 static int read_from_zspool(struct zram *zram, struct page *page, u32 index);
498
499 struct zram_wb_ctl {
500 /* idle list is accessed only by the writeback task, no concurency */
501 struct list_head idle_reqs;
502 /* done list is accessed concurrently, protect by done_lock */
503 struct list_head done_reqs;
504 wait_queue_head_t done_wait;
505 spinlock_t done_lock;
506 atomic_t num_inflight;
507 };
508
509 struct zram_wb_req {
510 unsigned long blk_idx;
511 struct page *page;
512 struct zram_pp_slot *pps;
513 struct bio_vec bio_vec;
514 struct bio bio;
515
516 struct list_head entry;
517 };
518
519 struct zram_rb_req {
520 struct work_struct work;
521 struct zram *zram;
522 struct page *page;
523 /* The read bio for backing device */
524 struct bio *bio;
525 unsigned long blk_idx;
526 union {
527 /* The original bio to complete (async read) */
528 struct bio *parent;
529 /* error status (sync read) */
530 int error;
531 };
532 u32 index;
533 };
534
535 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)536 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr,
537 char *buf)
538 {
539 struct zram *zram = dev_to_zram(dev);
540 ssize_t ret;
541
542 guard(rwsem_read)(&zram->dev_lock);
543 ret = sysfs_emit(buf,
544 "%8llu %8llu %8llu\n",
545 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
546 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
547 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
548
549 return ret;
550 }
551
writeback_compressed_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)552 static ssize_t writeback_compressed_store(struct device *dev,
553 struct device_attribute *attr,
554 const char *buf, size_t len)
555 {
556 struct zram *zram = dev_to_zram(dev);
557 bool val;
558
559 if (kstrtobool(buf, &val))
560 return -EINVAL;
561
562 guard(rwsem_write)(&zram->dev_lock);
563 if (init_done(zram)) {
564 return -EBUSY;
565 }
566
567 zram->wb_compressed = val;
568
569 return len;
570 }
571
writeback_compressed_show(struct device * dev,struct device_attribute * attr,char * buf)572 static ssize_t writeback_compressed_show(struct device *dev,
573 struct device_attribute *attr,
574 char *buf)
575 {
576 bool val;
577 struct zram *zram = dev_to_zram(dev);
578
579 guard(rwsem_read)(&zram->dev_lock);
580 val = zram->wb_compressed;
581
582 return sysfs_emit(buf, "%d\n", val);
583 }
584
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)585 static ssize_t writeback_limit_enable_store(struct device *dev,
586 struct device_attribute *attr,
587 const char *buf, size_t len)
588 {
589 struct zram *zram = dev_to_zram(dev);
590 u64 val;
591
592 if (kstrtoull(buf, 10, &val))
593 return -EINVAL;
594
595 guard(rwsem_write)(&zram->dev_lock);
596 zram->wb_limit_enable = val;
597
598 return len;
599 }
600
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)601 static ssize_t writeback_limit_enable_show(struct device *dev,
602 struct device_attribute *attr,
603 char *buf)
604 {
605 bool val;
606 struct zram *zram = dev_to_zram(dev);
607
608 guard(rwsem_read)(&zram->dev_lock);
609 val = zram->wb_limit_enable;
610
611 return sysfs_emit(buf, "%d\n", val);
612 }
613
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)614 static ssize_t writeback_limit_store(struct device *dev,
615 struct device_attribute *attr,
616 const char *buf, size_t len)
617 {
618 struct zram *zram = dev_to_zram(dev);
619 u64 val;
620
621 if (kstrtoull(buf, 10, &val))
622 return -EINVAL;
623
624 /*
625 * When the page size is greater than 4KB, if bd_wb_limit is set to
626 * a value that is not page - size aligned, it will cause value
627 * wrapping. For example, when the page size is set to 16KB and
628 * bd_wb_limit is set to 3, a single write - back operation will
629 * cause bd_wb_limit to become -1. Even more terrifying is that
630 * bd_wb_limit is an unsigned number.
631 */
632 val = rounddown(val, PAGE_SIZE / 4096);
633
634 guard(rwsem_write)(&zram->dev_lock);
635 zram->bd_wb_limit = val;
636
637 return len;
638 }
639
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)640 static ssize_t writeback_limit_show(struct device *dev,
641 struct device_attribute *attr, char *buf)
642 {
643 u64 val;
644 struct zram *zram = dev_to_zram(dev);
645
646 guard(rwsem_read)(&zram->dev_lock);
647 val = zram->bd_wb_limit;
648
649 return sysfs_emit(buf, "%llu\n", val);
650 }
651
writeback_batch_size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)652 static ssize_t writeback_batch_size_store(struct device *dev,
653 struct device_attribute *attr,
654 const char *buf, size_t len)
655 {
656 struct zram *zram = dev_to_zram(dev);
657 u32 val;
658
659 if (kstrtouint(buf, 10, &val))
660 return -EINVAL;
661
662 if (!val)
663 return -EINVAL;
664
665 guard(rwsem_write)(&zram->dev_lock);
666 zram->wb_batch_size = val;
667
668 return len;
669 }
670
writeback_batch_size_show(struct device * dev,struct device_attribute * attr,char * buf)671 static ssize_t writeback_batch_size_show(struct device *dev,
672 struct device_attribute *attr,
673 char *buf)
674 {
675 u32 val;
676 struct zram *zram = dev_to_zram(dev);
677
678 guard(rwsem_read)(&zram->dev_lock);
679 val = zram->wb_batch_size;
680
681 return sysfs_emit(buf, "%u\n", val);
682 }
683
reset_bdev(struct zram * zram)684 static void reset_bdev(struct zram *zram)
685 {
686 if (!zram->backing_dev)
687 return;
688
689 /* hope filp_close flush all of IO */
690 filp_close(zram->backing_dev, NULL);
691 zram->backing_dev = NULL;
692 zram->bdev = NULL;
693 zram->disk->fops = &zram_devops;
694 kvfree(zram->bitmap);
695 zram->bitmap = NULL;
696 }
697
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)698 static ssize_t backing_dev_show(struct device *dev,
699 struct device_attribute *attr, char *buf)
700 {
701 struct file *file;
702 struct zram *zram = dev_to_zram(dev);
703 char *p;
704 ssize_t ret;
705
706 guard(rwsem_read)(&zram->dev_lock);
707 file = zram->backing_dev;
708 if (!file) {
709 memcpy(buf, "none\n", 5);
710 return 5;
711 }
712
713 p = file_path(file, buf, PAGE_SIZE - 1);
714 if (IS_ERR(p))
715 return PTR_ERR(p);
716
717 ret = strlen(p);
718 memmove(buf, p, ret);
719 buf[ret++] = '\n';
720 return ret;
721 }
722
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)723 static ssize_t backing_dev_store(struct device *dev,
724 struct device_attribute *attr, const char *buf,
725 size_t len)
726 {
727 char *file_name;
728 size_t sz;
729 struct file *backing_dev = NULL;
730 struct inode *inode;
731 unsigned int bitmap_sz;
732 unsigned long nr_pages, *bitmap = NULL;
733 int err;
734 struct zram *zram = dev_to_zram(dev);
735
736 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
737 if (!file_name)
738 return -ENOMEM;
739
740 guard(rwsem_write)(&zram->dev_lock);
741 if (init_done(zram)) {
742 pr_info("Can't setup backing device for initialized device\n");
743 err = -EBUSY;
744 goto out;
745 }
746
747 strscpy(file_name, buf, PATH_MAX);
748 /* ignore trailing newline */
749 sz = strlen(file_name);
750 if (sz > 0 && file_name[sz - 1] == '\n')
751 file_name[sz - 1] = 0x00;
752
753 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
754 if (IS_ERR(backing_dev)) {
755 err = PTR_ERR(backing_dev);
756 backing_dev = NULL;
757 goto out;
758 }
759
760 inode = backing_dev->f_mapping->host;
761
762 /* Support only block device in this moment */
763 if (!S_ISBLK(inode->i_mode)) {
764 err = -ENOTBLK;
765 goto out;
766 }
767
768 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
769 /* Refuse to use zero sized device (also prevents self reference) */
770 if (!nr_pages) {
771 err = -EINVAL;
772 goto out;
773 }
774
775 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
776 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
777 if (!bitmap) {
778 err = -ENOMEM;
779 goto out;
780 }
781
782 reset_bdev(zram);
783
784 zram->bdev = I_BDEV(inode);
785 zram->backing_dev = backing_dev;
786 zram->bitmap = bitmap;
787 zram->nr_pages = nr_pages;
788
789 pr_info("setup backing device %s\n", file_name);
790 kfree(file_name);
791
792 return len;
793 out:
794 kvfree(bitmap);
795
796 if (backing_dev)
797 filp_close(backing_dev, NULL);
798
799 kfree(file_name);
800
801 return err;
802 }
803
zram_reserve_bdev_block(struct zram * zram)804 static unsigned long zram_reserve_bdev_block(struct zram *zram)
805 {
806 unsigned long blk_idx;
807
808 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0);
809 if (blk_idx == zram->nr_pages)
810 return INVALID_BDEV_BLOCK;
811
812 set_bit(blk_idx, zram->bitmap);
813 atomic64_inc(&zram->stats.bd_count);
814 return blk_idx;
815 }
816
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)817 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
818 {
819 int was_set;
820
821 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
822 WARN_ON_ONCE(!was_set);
823 atomic64_dec(&zram->stats.bd_count);
824 }
825
release_wb_req(struct zram_wb_req * req)826 static void release_wb_req(struct zram_wb_req *req)
827 {
828 __free_page(req->page);
829 kfree(req);
830 }
831
release_wb_ctl(struct zram_wb_ctl * wb_ctl)832 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
833 {
834 if (!wb_ctl)
835 return;
836
837 /* We should never have inflight requests at this point */
838 WARN_ON(atomic_read(&wb_ctl->num_inflight));
839 WARN_ON(!list_empty(&wb_ctl->done_reqs));
840
841 while (!list_empty(&wb_ctl->idle_reqs)) {
842 struct zram_wb_req *req;
843
844 req = list_first_entry(&wb_ctl->idle_reqs,
845 struct zram_wb_req, entry);
846 list_del(&req->entry);
847 release_wb_req(req);
848 }
849
850 kfree(wb_ctl);
851 }
852
init_wb_ctl(struct zram * zram)853 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
854 {
855 struct zram_wb_ctl *wb_ctl;
856 int i;
857
858 wb_ctl = kmalloc_obj(*wb_ctl);
859 if (!wb_ctl)
860 return NULL;
861
862 INIT_LIST_HEAD(&wb_ctl->idle_reqs);
863 INIT_LIST_HEAD(&wb_ctl->done_reqs);
864 atomic_set(&wb_ctl->num_inflight, 0);
865 init_waitqueue_head(&wb_ctl->done_wait);
866 spin_lock_init(&wb_ctl->done_lock);
867
868 for (i = 0; i < zram->wb_batch_size; i++) {
869 struct zram_wb_req *req;
870
871 /*
872 * This is fatal condition only if we couldn't allocate
873 * any requests at all. Otherwise we just work with the
874 * requests that we have successfully allocated, so that
875 * writeback can still proceed, even if there is only one
876 * request on the idle list.
877 */
878 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN);
879 if (!req)
880 break;
881
882 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN);
883 if (!req->page) {
884 kfree(req);
885 break;
886 }
887
888 list_add(&req->entry, &wb_ctl->idle_reqs);
889 }
890
891 /* We couldn't allocate any requests, so writeabck is not possible */
892 if (list_empty(&wb_ctl->idle_reqs))
893 goto release_wb_ctl;
894
895 return wb_ctl;
896
897 release_wb_ctl:
898 release_wb_ctl(wb_ctl);
899 return NULL;
900 }
901
zram_account_writeback_rollback(struct zram * zram)902 static void zram_account_writeback_rollback(struct zram *zram)
903 {
904 lockdep_assert_held_write(&zram->dev_lock);
905
906 if (zram->wb_limit_enable)
907 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
908 }
909
zram_account_writeback_submit(struct zram * zram)910 static void zram_account_writeback_submit(struct zram *zram)
911 {
912 lockdep_assert_held_write(&zram->dev_lock);
913
914 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
915 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
916 }
917
zram_writeback_complete(struct zram * zram,struct zram_wb_req * req)918 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
919 {
920 u32 size, index = req->pps->index;
921 int err, prio;
922 bool huge;
923
924 err = blk_status_to_errno(req->bio.bi_status);
925 if (err) {
926 /*
927 * Failed wb requests should not be accounted in wb_limit
928 * (if enabled).
929 */
930 zram_account_writeback_rollback(zram);
931 zram_release_bdev_block(zram, req->blk_idx);
932 return err;
933 }
934
935 atomic64_inc(&zram->stats.bd_writes);
936 slot_lock(zram, index);
937 /*
938 * We release slot lock during writeback so slot can change under us:
939 * slot_free() or slot_free() and zram_write_page(). In both cases
940 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can
941 * set ZRAM_PP_SLOT on such slots until current post-processing
942 * finishes.
943 */
944 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) {
945 zram_release_bdev_block(zram, req->blk_idx);
946 goto out;
947 }
948
949 if (zram->wb_compressed) {
950 /*
951 * ZRAM_WB slots get freed, we need to preserve data required
952 * for read decompression.
953 */
954 size = get_slot_size(zram, index);
955 prio = get_slot_comp_priority(zram, index);
956 huge = test_slot_flag(zram, index, ZRAM_HUGE);
957 }
958
959 slot_free(zram, index);
960 set_slot_flag(zram, index, ZRAM_WB);
961 set_slot_handle(zram, index, req->blk_idx);
962
963 if (zram->wb_compressed) {
964 if (huge)
965 set_slot_flag(zram, index, ZRAM_HUGE);
966 set_slot_size(zram, index, size);
967 set_slot_comp_priority(zram, index, prio);
968 }
969
970 atomic64_inc(&zram->stats.pages_stored);
971
972 out:
973 slot_unlock(zram, index);
974 return 0;
975 }
976
zram_writeback_endio(struct bio * bio)977 static void zram_writeback_endio(struct bio *bio)
978 {
979 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio);
980 struct zram_wb_ctl *wb_ctl = bio->bi_private;
981 unsigned long flags;
982
983 spin_lock_irqsave(&wb_ctl->done_lock, flags);
984 list_add(&req->entry, &wb_ctl->done_reqs);
985 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
986
987 wake_up(&wb_ctl->done_wait);
988 }
989
zram_submit_wb_request(struct zram * zram,struct zram_wb_ctl * wb_ctl,struct zram_wb_req * req)990 static void zram_submit_wb_request(struct zram *zram,
991 struct zram_wb_ctl *wb_ctl,
992 struct zram_wb_req *req)
993 {
994 /*
995 * wb_limit (if enabled) should be adjusted before submission,
996 * so that we don't over-submit.
997 */
998 zram_account_writeback_submit(zram);
999 atomic_inc(&wb_ctl->num_inflight);
1000 req->bio.bi_private = wb_ctl;
1001 submit_bio(&req->bio);
1002 }
1003
zram_complete_done_reqs(struct zram * zram,struct zram_wb_ctl * wb_ctl)1004 static int zram_complete_done_reqs(struct zram *zram,
1005 struct zram_wb_ctl *wb_ctl)
1006 {
1007 struct zram_wb_req *req;
1008 unsigned long flags;
1009 int ret = 0, err;
1010
1011 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1012 spin_lock_irqsave(&wb_ctl->done_lock, flags);
1013 req = list_first_entry_or_null(&wb_ctl->done_reqs,
1014 struct zram_wb_req, entry);
1015 if (req)
1016 list_del(&req->entry);
1017 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
1018
1019 /* ->num_inflight > 0 doesn't mean we have done requests */
1020 if (!req)
1021 break;
1022
1023 err = zram_writeback_complete(zram, req);
1024 if (err)
1025 ret = err;
1026
1027 atomic_dec(&wb_ctl->num_inflight);
1028 release_pp_slot(zram, req->pps);
1029 req->pps = NULL;
1030
1031 list_add(&req->entry, &wb_ctl->idle_reqs);
1032 }
1033
1034 return ret;
1035 }
1036
zram_select_idle_req(struct zram_wb_ctl * wb_ctl)1037 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl)
1038 {
1039 struct zram_wb_req *req;
1040
1041 req = list_first_entry_or_null(&wb_ctl->idle_reqs,
1042 struct zram_wb_req, entry);
1043 if (req)
1044 list_del(&req->entry);
1045 return req;
1046 }
1047
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl,struct zram_wb_ctl * wb_ctl)1048 static int zram_writeback_slots(struct zram *zram,
1049 struct zram_pp_ctl *ctl,
1050 struct zram_wb_ctl *wb_ctl)
1051 {
1052 unsigned long blk_idx = INVALID_BDEV_BLOCK;
1053 struct zram_wb_req *req = NULL;
1054 struct zram_pp_slot *pps;
1055 int ret = 0, err = 0;
1056 u32 index = 0;
1057
1058 while ((pps = select_pp_slot(ctl))) {
1059 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
1060 ret = -EIO;
1061 break;
1062 }
1063
1064 while (!req) {
1065 req = zram_select_idle_req(wb_ctl);
1066 if (req)
1067 break;
1068
1069 wait_event(wb_ctl->done_wait,
1070 !list_empty(&wb_ctl->done_reqs));
1071
1072 err = zram_complete_done_reqs(zram, wb_ctl);
1073 /*
1074 * BIO errors are not fatal, we continue and simply
1075 * attempt to writeback the remaining objects (pages).
1076 * At the same time we need to signal user-space that
1077 * some writes (at least one, but also could be all of
1078 * them) were not successful and we do so by returning
1079 * the most recent BIO error.
1080 */
1081 if (err)
1082 ret = err;
1083 }
1084
1085 if (blk_idx == INVALID_BDEV_BLOCK) {
1086 blk_idx = zram_reserve_bdev_block(zram);
1087 if (blk_idx == INVALID_BDEV_BLOCK) {
1088 ret = -ENOSPC;
1089 break;
1090 }
1091 }
1092
1093 index = pps->index;
1094 slot_lock(zram, index);
1095 /*
1096 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
1097 * slots can change in the meantime. If slots are accessed or
1098 * freed they lose ZRAM_PP_SLOT flag and hence we don't
1099 * post-process them.
1100 */
1101 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT))
1102 goto next;
1103 if (zram->wb_compressed)
1104 err = read_from_zspool_raw(zram, req->page, index);
1105 else
1106 err = read_from_zspool(zram, req->page, index);
1107 if (err)
1108 goto next;
1109 slot_unlock(zram, index);
1110
1111 /*
1112 * From now on pp-slot is owned by the req, remove it from
1113 * its pp bucket.
1114 */
1115 list_del_init(&pps->entry);
1116
1117 req->blk_idx = blk_idx;
1118 req->pps = pps;
1119 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE);
1120 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1121 req->bio.bi_end_io = zram_writeback_endio;
1122 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0);
1123
1124 zram_submit_wb_request(zram, wb_ctl, req);
1125 blk_idx = INVALID_BDEV_BLOCK;
1126 req = NULL;
1127 cond_resched();
1128 continue;
1129
1130 next:
1131 slot_unlock(zram, index);
1132 release_pp_slot(zram, pps);
1133 }
1134
1135 /*
1136 * Selected idle req, but never submitted it due to some error or
1137 * wb limit.
1138 */
1139 if (req)
1140 release_wb_req(req);
1141
1142 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1143 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs));
1144 err = zram_complete_done_reqs(zram, wb_ctl);
1145 if (err)
1146 ret = err;
1147 }
1148
1149 return ret;
1150 }
1151
1152 #define PAGE_WRITEBACK 0
1153 #define HUGE_WRITEBACK (1 << 0)
1154 #define IDLE_WRITEBACK (1 << 1)
1155 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
1156
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1157 static int parse_page_index(char *val, unsigned long nr_pages,
1158 unsigned long *lo, unsigned long *hi)
1159 {
1160 int ret;
1161
1162 ret = kstrtoul(val, 10, lo);
1163 if (ret)
1164 return ret;
1165 if (*lo >= nr_pages)
1166 return -ERANGE;
1167 *hi = *lo + 1;
1168 return 0;
1169 }
1170
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1171 static int parse_page_indexes(char *val, unsigned long nr_pages,
1172 unsigned long *lo, unsigned long *hi)
1173 {
1174 char *delim;
1175 int ret;
1176
1177 delim = strchr(val, '-');
1178 if (!delim)
1179 return -EINVAL;
1180
1181 *delim = 0x00;
1182 ret = kstrtoul(val, 10, lo);
1183 if (ret)
1184 return ret;
1185 if (*lo >= nr_pages)
1186 return -ERANGE;
1187
1188 ret = kstrtoul(delim + 1, 10, hi);
1189 if (ret)
1190 return ret;
1191 if (*hi >= nr_pages || *lo > *hi)
1192 return -ERANGE;
1193 *hi += 1;
1194 return 0;
1195 }
1196
parse_mode(char * val,u32 * mode)1197 static int parse_mode(char *val, u32 *mode)
1198 {
1199 *mode = 0;
1200
1201 if (!strcmp(val, "idle"))
1202 *mode = IDLE_WRITEBACK;
1203 if (!strcmp(val, "huge"))
1204 *mode = HUGE_WRITEBACK;
1205 if (!strcmp(val, "huge_idle"))
1206 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
1207 if (!strcmp(val, "incompressible"))
1208 *mode = INCOMPRESSIBLE_WRITEBACK;
1209
1210 if (*mode == 0)
1211 return -EINVAL;
1212 return 0;
1213 }
1214
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)1215 static int scan_slots_for_writeback(struct zram *zram, u32 mode,
1216 unsigned long lo, unsigned long hi,
1217 struct zram_pp_ctl *ctl)
1218 {
1219 u32 index = lo;
1220
1221 while (index < hi) {
1222 bool ok = true;
1223
1224 slot_lock(zram, index);
1225 if (!slot_allocated(zram, index))
1226 goto next;
1227
1228 if (test_slot_flag(zram, index, ZRAM_WB) ||
1229 test_slot_flag(zram, index, ZRAM_SAME))
1230 goto next;
1231
1232 if (mode & IDLE_WRITEBACK &&
1233 !test_slot_flag(zram, index, ZRAM_IDLE))
1234 goto next;
1235 if (mode & HUGE_WRITEBACK &&
1236 !test_slot_flag(zram, index, ZRAM_HUGE))
1237 goto next;
1238 if (mode & INCOMPRESSIBLE_WRITEBACK &&
1239 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1240 goto next;
1241
1242 ok = place_pp_slot(zram, ctl, index);
1243 next:
1244 slot_unlock(zram, index);
1245 if (!ok)
1246 break;
1247 index++;
1248 }
1249
1250 return 0;
1251 }
1252
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1253 static ssize_t writeback_store(struct device *dev,
1254 struct device_attribute *attr,
1255 const char *buf, size_t len)
1256 {
1257 struct zram *zram = dev_to_zram(dev);
1258 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
1259 unsigned long lo = 0, hi = nr_pages;
1260 struct zram_pp_ctl *pp_ctl = NULL;
1261 struct zram_wb_ctl *wb_ctl = NULL;
1262 char *args, *param, *val;
1263 ssize_t ret = len;
1264 int err, mode = 0;
1265
1266 guard(rwsem_write)(&zram->dev_lock);
1267 if (!init_done(zram))
1268 return -EINVAL;
1269
1270 if (!zram->backing_dev)
1271 return -ENODEV;
1272
1273 pp_ctl = init_pp_ctl();
1274 if (!pp_ctl)
1275 return -ENOMEM;
1276
1277 wb_ctl = init_wb_ctl(zram);
1278 if (!wb_ctl) {
1279 ret = -ENOMEM;
1280 goto out;
1281 }
1282
1283 args = skip_spaces(buf);
1284 while (*args) {
1285 args = next_arg(args, ¶m, &val);
1286
1287 /*
1288 * Workaround to support the old writeback interface.
1289 *
1290 * The old writeback interface has a minor inconsistency and
1291 * requires key=value only for page_index parameter, while the
1292 * writeback mode is a valueless parameter.
1293 *
1294 * This is not the case anymore and now all parameters are
1295 * required to have values, however, we need to support the
1296 * legacy writeback interface format so we check if we can
1297 * recognize a valueless parameter as the (legacy) writeback
1298 * mode.
1299 */
1300 if (!val || !*val) {
1301 err = parse_mode(param, &mode);
1302 if (err) {
1303 ret = err;
1304 goto out;
1305 }
1306
1307 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1308 break;
1309 }
1310
1311 if (!strcmp(param, "type")) {
1312 err = parse_mode(val, &mode);
1313 if (err) {
1314 ret = err;
1315 goto out;
1316 }
1317
1318 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1319 break;
1320 }
1321
1322 if (!strcmp(param, "page_index")) {
1323 err = parse_page_index(val, nr_pages, &lo, &hi);
1324 if (err) {
1325 ret = err;
1326 goto out;
1327 }
1328
1329 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1330 continue;
1331 }
1332
1333 if (!strcmp(param, "page_indexes")) {
1334 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1335 if (err) {
1336 ret = err;
1337 goto out;
1338 }
1339
1340 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1341 continue;
1342 }
1343 }
1344
1345 err = zram_writeback_slots(zram, pp_ctl, wb_ctl);
1346 if (err)
1347 ret = err;
1348
1349 out:
1350 release_pp_ctl(zram, pp_ctl);
1351 release_wb_ctl(wb_ctl);
1352
1353 return ret;
1354 }
1355
decompress_bdev_page(struct zram * zram,struct page * page,u32 index)1356 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index)
1357 {
1358 struct zcomp_strm *zstrm;
1359 unsigned int size;
1360 int ret, prio;
1361 void *src;
1362
1363 slot_lock(zram, index);
1364 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */
1365 if (!test_slot_flag(zram, index, ZRAM_WB)) {
1366 slot_unlock(zram, index);
1367 /* We read some stale data, zero it out */
1368 memset_page(page, 0, 0, PAGE_SIZE);
1369 return -EIO;
1370 }
1371
1372 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
1373 slot_unlock(zram, index);
1374 return 0;
1375 }
1376
1377 size = get_slot_size(zram, index);
1378 prio = get_slot_comp_priority(zram, index);
1379
1380 zstrm = zcomp_stream_get(zram->comps[prio]);
1381 src = kmap_local_page(page);
1382 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size,
1383 zstrm->local_copy);
1384 if (!ret)
1385 copy_page(src, zstrm->local_copy);
1386 kunmap_local(src);
1387 zcomp_stream_put(zstrm);
1388 slot_unlock(zram, index);
1389
1390 return ret;
1391 }
1392
zram_deferred_decompress(struct work_struct * w)1393 static void zram_deferred_decompress(struct work_struct *w)
1394 {
1395 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1396 struct page *page = bio_first_page_all(req->bio);
1397 struct zram *zram = req->zram;
1398 u32 index = req->index;
1399 int ret;
1400
1401 ret = decompress_bdev_page(zram, page, index);
1402 if (ret)
1403 req->parent->bi_status = BLK_STS_IOERR;
1404
1405 /* Decrement parent's ->remaining */
1406 bio_endio(req->parent);
1407 bio_put(req->bio);
1408 kfree(req);
1409 }
1410
zram_async_read_endio(struct bio * bio)1411 static void zram_async_read_endio(struct bio *bio)
1412 {
1413 struct zram_rb_req *req = bio->bi_private;
1414 struct zram *zram = req->zram;
1415
1416 if (bio->bi_status) {
1417 req->parent->bi_status = bio->bi_status;
1418 bio_endio(req->parent);
1419 bio_put(bio);
1420 kfree(req);
1421 return;
1422 }
1423
1424 /*
1425 * NOTE: zram_async_read_endio() is not exactly right place for this.
1426 * Ideally, we need to do it after ZRAM_WB check, but this requires
1427 * us to use wq path even on systems that don't enable compressed
1428 * writeback, because we cannot take slot-lock in the current context.
1429 *
1430 * Keep the existing behavior for now.
1431 */
1432 if (zram->wb_compressed == false) {
1433 /* No decompression needed, complete the parent IO */
1434 bio_endio(req->parent);
1435 bio_put(bio);
1436 kfree(req);
1437 return;
1438 }
1439
1440 /*
1441 * zram decompression is sleepable, so we need to deffer it to
1442 * a preemptible context.
1443 */
1444 INIT_WORK(&req->work, zram_deferred_decompress);
1445 queue_work(system_highpri_wq, &req->work);
1446 }
1447
read_from_bdev_async(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1448 static void read_from_bdev_async(struct zram *zram, struct page *page,
1449 u32 index, unsigned long blk_idx,
1450 struct bio *parent)
1451 {
1452 struct zram_rb_req *req;
1453 struct bio *bio;
1454
1455 req = kmalloc_obj(*req, GFP_NOIO);
1456 if (!req)
1457 return;
1458
1459 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
1460 if (!bio) {
1461 kfree(req);
1462 return;
1463 }
1464
1465 req->zram = zram;
1466 req->index = index;
1467 req->blk_idx = blk_idx;
1468 req->bio = bio;
1469 req->parent = parent;
1470
1471 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
1472 bio->bi_private = req;
1473 bio->bi_end_io = zram_async_read_endio;
1474
1475 __bio_add_page(bio, page, PAGE_SIZE, 0);
1476 bio_inc_remaining(parent);
1477 submit_bio(bio);
1478 }
1479
zram_sync_read(struct work_struct * w)1480 static void zram_sync_read(struct work_struct *w)
1481 {
1482 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1483 struct bio_vec bv;
1484 struct bio bio;
1485
1486 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ);
1487 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1488 __bio_add_page(&bio, req->page, PAGE_SIZE, 0);
1489 req->error = submit_bio_wait(&bio);
1490 }
1491
1492 /*
1493 * Block layer want one ->submit_bio to be active at a time, so if we use
1494 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1495 * use a worker thread context.
1496 */
read_from_bdev_sync(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx)1497 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index,
1498 unsigned long blk_idx)
1499 {
1500 struct zram_rb_req req;
1501
1502 req.page = page;
1503 req.zram = zram;
1504 req.blk_idx = blk_idx;
1505
1506 INIT_WORK_ONSTACK(&req.work, zram_sync_read);
1507 queue_work(system_dfl_wq, &req.work);
1508 flush_work(&req.work);
1509 destroy_work_on_stack(&req.work);
1510
1511 if (req.error || zram->wb_compressed == false)
1512 return req.error;
1513
1514 return decompress_bdev_page(zram, page, index);
1515 }
1516
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1517 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1518 unsigned long blk_idx, struct bio *parent)
1519 {
1520 atomic64_inc(&zram->stats.bd_reads);
1521 if (!parent) {
1522 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1523 return -EIO;
1524 return read_from_bdev_sync(zram, page, index, blk_idx);
1525 }
1526 read_from_bdev_async(zram, page, index, blk_idx, parent);
1527 return 0;
1528 }
1529 #else
reset_bdev(struct zram * zram)1530 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1531 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1532 unsigned long blk_idx, struct bio *parent)
1533 {
1534 return -EIO;
1535 }
1536
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)1537 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
1538 {
1539 }
1540 #endif
1541
1542 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1543
1544 static struct dentry *zram_debugfs_root;
1545
zram_debugfs_create(void)1546 static void zram_debugfs_create(void)
1547 {
1548 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1549 }
1550
zram_debugfs_destroy(void)1551 static void zram_debugfs_destroy(void)
1552 {
1553 debugfs_remove_recursive(zram_debugfs_root);
1554 }
1555
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1556 static ssize_t read_block_state(struct file *file, char __user *buf,
1557 size_t count, loff_t *ppos)
1558 {
1559 char *kbuf;
1560 ssize_t index, written = 0;
1561 struct zram *zram = file->private_data;
1562 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1563
1564 kbuf = kvmalloc(count, GFP_KERNEL);
1565 if (!kbuf)
1566 return -ENOMEM;
1567
1568 guard(rwsem_read)(&zram->dev_lock);
1569 if (!init_done(zram)) {
1570 kvfree(kbuf);
1571 return -EINVAL;
1572 }
1573
1574 for (index = *ppos; index < nr_pages; index++) {
1575 int copied;
1576
1577 slot_lock(zram, index);
1578 if (!slot_allocated(zram, index))
1579 goto next;
1580
1581 copied = snprintf(kbuf + written, count,
1582 "%12zd %12u.%06d %c%c%c%c%c%c\n",
1583 index, zram->table[index].attr.ac_time, 0,
1584 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1585 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1586 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1587 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1588 get_slot_comp_priority(zram, index) ? 'r' : '.',
1589 test_slot_flag(zram, index,
1590 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1591
1592 if (count <= copied) {
1593 slot_unlock(zram, index);
1594 break;
1595 }
1596 written += copied;
1597 count -= copied;
1598 next:
1599 slot_unlock(zram, index);
1600 *ppos += 1;
1601 }
1602
1603 if (copy_to_user(buf, kbuf, written))
1604 written = -EFAULT;
1605 kvfree(kbuf);
1606
1607 return written;
1608 }
1609
1610 static const struct file_operations proc_zram_block_state_op = {
1611 .open = simple_open,
1612 .read = read_block_state,
1613 .llseek = default_llseek,
1614 };
1615
zram_debugfs_register(struct zram * zram)1616 static void zram_debugfs_register(struct zram *zram)
1617 {
1618 if (!zram_debugfs_root)
1619 return;
1620
1621 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1622 zram_debugfs_root);
1623 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1624 zram, &proc_zram_block_state_op);
1625 }
1626
zram_debugfs_unregister(struct zram * zram)1627 static void zram_debugfs_unregister(struct zram *zram)
1628 {
1629 debugfs_remove_recursive(zram->debugfs_dir);
1630 }
1631 #else
zram_debugfs_create(void)1632 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1633 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1634 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1635 static void zram_debugfs_unregister(struct zram *zram) {};
1636 #endif
1637
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1638 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1639 {
1640 /* Do not free statically defined compression algorithms */
1641 if (zram->comp_algs[prio] != default_compressor)
1642 kfree(zram->comp_algs[prio]);
1643
1644 zram->comp_algs[prio] = alg;
1645 }
1646
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1647 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1648 {
1649 char *compressor;
1650 size_t sz;
1651
1652 sz = strlen(buf);
1653 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1654 return -E2BIG;
1655
1656 compressor = kstrdup(buf, GFP_KERNEL);
1657 if (!compressor)
1658 return -ENOMEM;
1659
1660 /* ignore trailing newline */
1661 if (sz > 0 && compressor[sz - 1] == '\n')
1662 compressor[sz - 1] = 0x00;
1663
1664 if (!zcomp_available_algorithm(compressor)) {
1665 kfree(compressor);
1666 return -EINVAL;
1667 }
1668
1669 guard(rwsem_write)(&zram->dev_lock);
1670 if (init_done(zram)) {
1671 kfree(compressor);
1672 pr_info("Can't change algorithm for initialized device\n");
1673 return -EBUSY;
1674 }
1675
1676 comp_algorithm_set(zram, prio, compressor);
1677 return 0;
1678 }
1679
comp_params_reset(struct zram * zram,u32 prio)1680 static void comp_params_reset(struct zram *zram, u32 prio)
1681 {
1682 struct zcomp_params *params = &zram->params[prio];
1683
1684 vfree(params->dict);
1685 params->level = ZCOMP_PARAM_NOT_SET;
1686 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1687 params->dict_sz = 0;
1688 params->dict = NULL;
1689 }
1690
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1691 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1692 const char *dict_path,
1693 struct deflate_params *deflate_params)
1694 {
1695 ssize_t sz = 0;
1696
1697 comp_params_reset(zram, prio);
1698
1699 if (dict_path) {
1700 sz = kernel_read_file_from_path(dict_path, 0,
1701 &zram->params[prio].dict,
1702 INT_MAX,
1703 NULL,
1704 READING_POLICY);
1705 if (sz < 0)
1706 return -EINVAL;
1707 }
1708
1709 zram->params[prio].dict_sz = sz;
1710 zram->params[prio].level = level;
1711 zram->params[prio].deflate.winbits = deflate_params->winbits;
1712 return 0;
1713 }
1714
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1715 static ssize_t algorithm_params_store(struct device *dev,
1716 struct device_attribute *attr,
1717 const char *buf,
1718 size_t len)
1719 {
1720 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1721 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1722 struct deflate_params deflate_params;
1723 struct zram *zram = dev_to_zram(dev);
1724 int ret;
1725
1726 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1727
1728 args = skip_spaces(buf);
1729 while (*args) {
1730 args = next_arg(args, ¶m, &val);
1731
1732 if (!val || !*val)
1733 return -EINVAL;
1734
1735 if (!strcmp(param, "priority")) {
1736 ret = kstrtoint(val, 10, &prio);
1737 if (ret)
1738 return ret;
1739 continue;
1740 }
1741
1742 if (!strcmp(param, "level")) {
1743 ret = kstrtoint(val, 10, &level);
1744 if (ret)
1745 return ret;
1746 continue;
1747 }
1748
1749 if (!strcmp(param, "algo")) {
1750 algo = val;
1751 continue;
1752 }
1753
1754 if (!strcmp(param, "dict")) {
1755 dict_path = val;
1756 continue;
1757 }
1758
1759 if (!strcmp(param, "deflate.winbits")) {
1760 ret = kstrtoint(val, 10, &deflate_params.winbits);
1761 if (ret)
1762 return ret;
1763 continue;
1764 }
1765 }
1766
1767 /* Lookup priority by algorithm name */
1768 if (algo) {
1769 s32 p;
1770
1771 prio = -EINVAL;
1772 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) {
1773 if (!zram->comp_algs[p])
1774 continue;
1775
1776 if (!strcmp(zram->comp_algs[p], algo)) {
1777 prio = p;
1778 break;
1779 }
1780 }
1781 }
1782
1783 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1784 return -EINVAL;
1785
1786 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1787 return ret ? ret : len;
1788 }
1789
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1790 static ssize_t comp_algorithm_show(struct device *dev,
1791 struct device_attribute *attr,
1792 char *buf)
1793 {
1794 struct zram *zram = dev_to_zram(dev);
1795 ssize_t sz;
1796
1797 guard(rwsem_read)(&zram->dev_lock);
1798 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1799 return sz;
1800 }
1801
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1802 static ssize_t comp_algorithm_store(struct device *dev,
1803 struct device_attribute *attr,
1804 const char *buf,
1805 size_t len)
1806 {
1807 struct zram *zram = dev_to_zram(dev);
1808 int ret;
1809
1810 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1811 return ret ? ret : len;
1812 }
1813
1814 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1815 static ssize_t recomp_algorithm_show(struct device *dev,
1816 struct device_attribute *attr,
1817 char *buf)
1818 {
1819 struct zram *zram = dev_to_zram(dev);
1820 ssize_t sz = 0;
1821 u32 prio;
1822
1823 guard(rwsem_read)(&zram->dev_lock);
1824 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1825 if (!zram->comp_algs[prio])
1826 continue;
1827
1828 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1829 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1830 }
1831 return sz;
1832 }
1833
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1834 static ssize_t recomp_algorithm_store(struct device *dev,
1835 struct device_attribute *attr,
1836 const char *buf,
1837 size_t len)
1838 {
1839 struct zram *zram = dev_to_zram(dev);
1840 int prio = ZRAM_SECONDARY_COMP;
1841 char *args, *param, *val;
1842 char *alg = NULL;
1843 int ret;
1844
1845 args = skip_spaces(buf);
1846 while (*args) {
1847 args = next_arg(args, ¶m, &val);
1848
1849 if (!val || !*val)
1850 return -EINVAL;
1851
1852 if (!strcmp(param, "algo")) {
1853 alg = val;
1854 continue;
1855 }
1856
1857 if (!strcmp(param, "priority")) {
1858 ret = kstrtoint(val, 10, &prio);
1859 if (ret)
1860 return ret;
1861 continue;
1862 }
1863 }
1864
1865 if (!alg)
1866 return -EINVAL;
1867
1868 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1869 return -EINVAL;
1870
1871 ret = __comp_algorithm_store(zram, prio, alg);
1872 return ret ? ret : len;
1873 }
1874 #endif
1875
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1876 static ssize_t compact_store(struct device *dev, struct device_attribute *attr,
1877 const char *buf, size_t len)
1878 {
1879 struct zram *zram = dev_to_zram(dev);
1880
1881 guard(rwsem_read)(&zram->dev_lock);
1882 if (!init_done(zram))
1883 return -EINVAL;
1884
1885 zs_compact(zram->mem_pool);
1886
1887 return len;
1888 }
1889
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1890 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr,
1891 char *buf)
1892 {
1893 struct zram *zram = dev_to_zram(dev);
1894 ssize_t ret;
1895
1896 guard(rwsem_read)(&zram->dev_lock);
1897 ret = sysfs_emit(buf,
1898 "%8llu %8llu 0 %8llu\n",
1899 (u64)atomic64_read(&zram->stats.failed_reads),
1900 (u64)atomic64_read(&zram->stats.failed_writes),
1901 (u64)atomic64_read(&zram->stats.notify_free));
1902
1903 return ret;
1904 }
1905
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1906 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr,
1907 char *buf)
1908 {
1909 struct zram *zram = dev_to_zram(dev);
1910 struct zs_pool_stats pool_stats;
1911 u64 orig_size, mem_used = 0;
1912 long max_used;
1913 ssize_t ret;
1914
1915 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1916
1917 guard(rwsem_read)(&zram->dev_lock);
1918 if (init_done(zram)) {
1919 mem_used = zs_get_total_pages(zram->mem_pool);
1920 zs_pool_stats(zram->mem_pool, &pool_stats);
1921 }
1922
1923 orig_size = atomic64_read(&zram->stats.pages_stored);
1924 max_used = atomic_long_read(&zram->stats.max_used_pages);
1925
1926 ret = sysfs_emit(buf,
1927 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1928 orig_size << PAGE_SHIFT,
1929 (u64)atomic64_read(&zram->stats.compr_data_size),
1930 mem_used << PAGE_SHIFT,
1931 zram->limit_pages << PAGE_SHIFT,
1932 max_used << PAGE_SHIFT,
1933 (u64)atomic64_read(&zram->stats.same_pages),
1934 atomic_long_read(&pool_stats.pages_compacted),
1935 (u64)atomic64_read(&zram->stats.huge_pages),
1936 (u64)atomic64_read(&zram->stats.huge_pages_since));
1937
1938 return ret;
1939 }
1940
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1941 static ssize_t debug_stat_show(struct device *dev,
1942 struct device_attribute *attr, char *buf)
1943 {
1944 int version = 1;
1945 struct zram *zram = dev_to_zram(dev);
1946 ssize_t ret;
1947
1948 guard(rwsem_read)(&zram->dev_lock);
1949 ret = sysfs_emit(buf,
1950 "version: %d\n0 %8llu\n",
1951 version,
1952 (u64)atomic64_read(&zram->stats.miss_free));
1953
1954 return ret;
1955 }
1956
zram_meta_free(struct zram * zram,u64 disksize)1957 static void zram_meta_free(struct zram *zram, u64 disksize)
1958 {
1959 size_t num_pages = disksize >> PAGE_SHIFT;
1960 size_t index;
1961
1962 if (!zram->table)
1963 return;
1964
1965 /* Free all pages that are still in this zram device */
1966 for (index = 0; index < num_pages; index++)
1967 slot_free(zram, index);
1968
1969 zs_destroy_pool(zram->mem_pool);
1970 vfree(zram->table);
1971 zram->table = NULL;
1972 }
1973
zram_meta_alloc(struct zram * zram,u64 disksize)1974 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1975 {
1976 size_t num_pages, index;
1977
1978 num_pages = disksize >> PAGE_SHIFT;
1979 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1980 if (!zram->table)
1981 return false;
1982
1983 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1984 if (!zram->mem_pool) {
1985 vfree(zram->table);
1986 zram->table = NULL;
1987 return false;
1988 }
1989
1990 if (!huge_class_size)
1991 huge_class_size = zs_huge_class_size(zram->mem_pool);
1992
1993 for (index = 0; index < num_pages; index++)
1994 slot_lock_init(zram, index);
1995
1996 return true;
1997 }
1998
slot_free(struct zram * zram,u32 index)1999 static void slot_free(struct zram *zram, u32 index)
2000 {
2001 unsigned long handle;
2002
2003 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
2004 zram->table[index].attr.ac_time = 0;
2005 #endif
2006
2007 clear_slot_flag(zram, index, ZRAM_IDLE);
2008 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2009 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
2010 set_slot_comp_priority(zram, index, 0);
2011
2012 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
2013 clear_slot_flag(zram, index, ZRAM_HUGE);
2014 atomic64_dec(&zram->stats.huge_pages);
2015 }
2016
2017 if (test_slot_flag(zram, index, ZRAM_WB)) {
2018 clear_slot_flag(zram, index, ZRAM_WB);
2019 zram_release_bdev_block(zram, get_slot_handle(zram, index));
2020 goto out;
2021 }
2022
2023 /*
2024 * No memory is allocated for same element filled pages.
2025 * Simply clear same page flag.
2026 */
2027 if (test_slot_flag(zram, index, ZRAM_SAME)) {
2028 clear_slot_flag(zram, index, ZRAM_SAME);
2029 atomic64_dec(&zram->stats.same_pages);
2030 goto out;
2031 }
2032
2033 handle = get_slot_handle(zram, index);
2034 if (!handle)
2035 return;
2036
2037 zs_free(zram->mem_pool, handle);
2038
2039 atomic64_sub(get_slot_size(zram, index),
2040 &zram->stats.compr_data_size);
2041 out:
2042 atomic64_dec(&zram->stats.pages_stored);
2043 set_slot_handle(zram, index, 0);
2044 set_slot_size(zram, index, 0);
2045 }
2046
read_same_filled_page(struct zram * zram,struct page * page,u32 index)2047 static int read_same_filled_page(struct zram *zram, struct page *page,
2048 u32 index)
2049 {
2050 void *mem;
2051
2052 mem = kmap_local_page(page);
2053 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index));
2054 kunmap_local(mem);
2055 return 0;
2056 }
2057
read_incompressible_page(struct zram * zram,struct page * page,u32 index)2058 static int read_incompressible_page(struct zram *zram, struct page *page,
2059 u32 index)
2060 {
2061 unsigned long handle;
2062 void *src, *dst;
2063
2064 handle = get_slot_handle(zram, index);
2065 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
2066 dst = kmap_local_page(page);
2067 copy_page(dst, src);
2068 kunmap_local(dst);
2069 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
2070
2071 return 0;
2072 }
2073
read_compressed_page(struct zram * zram,struct page * page,u32 index)2074 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
2075 {
2076 struct zcomp_strm *zstrm;
2077 unsigned long handle;
2078 unsigned int size;
2079 void *src, *dst;
2080 int ret, prio;
2081
2082 handle = get_slot_handle(zram, index);
2083 size = get_slot_size(zram, index);
2084 prio = get_slot_comp_priority(zram, index);
2085
2086 zstrm = zcomp_stream_get(zram->comps[prio]);
2087 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2088 zstrm->local_copy);
2089 dst = kmap_local_page(page);
2090 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
2091 kunmap_local(dst);
2092 zs_obj_read_end(zram->mem_pool, handle, size, src);
2093 zcomp_stream_put(zstrm);
2094
2095 return ret;
2096 }
2097
2098 #if defined CONFIG_ZRAM_WRITEBACK
read_from_zspool_raw(struct zram * zram,struct page * page,u32 index)2099 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
2100 {
2101 struct zcomp_strm *zstrm;
2102 unsigned long handle;
2103 unsigned int size;
2104 void *src;
2105
2106 handle = get_slot_handle(zram, index);
2107 size = get_slot_size(zram, index);
2108
2109 /*
2110 * We need to get stream just for ->local_copy buffer, in
2111 * case if object spans two physical pages. No decompression
2112 * takes place here, as we read raw compressed data.
2113 */
2114 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2115 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2116 zstrm->local_copy);
2117 memcpy_to_page(page, 0, src, size);
2118 zs_obj_read_end(zram->mem_pool, handle, size, src);
2119 zcomp_stream_put(zstrm);
2120
2121 return 0;
2122 }
2123 #endif
2124
2125 /*
2126 * Reads (decompresses if needed) a page from zspool (zsmalloc).
2127 * Corresponding ZRAM slot should be locked.
2128 */
read_from_zspool(struct zram * zram,struct page * page,u32 index)2129 static int read_from_zspool(struct zram *zram, struct page *page, u32 index)
2130 {
2131 if (test_slot_flag(zram, index, ZRAM_SAME) ||
2132 !get_slot_handle(zram, index))
2133 return read_same_filled_page(zram, page, index);
2134
2135 if (!test_slot_flag(zram, index, ZRAM_HUGE))
2136 return read_compressed_page(zram, page, index);
2137 else
2138 return read_incompressible_page(zram, page, index);
2139 }
2140
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)2141 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
2142 struct bio *parent)
2143 {
2144 int ret;
2145
2146 slot_lock(zram, index);
2147 if (!test_slot_flag(zram, index, ZRAM_WB)) {
2148 /* Slot should be locked through out the function call */
2149 ret = read_from_zspool(zram, page, index);
2150 slot_unlock(zram, index);
2151 } else {
2152 unsigned long blk_idx = get_slot_handle(zram, index);
2153
2154 /*
2155 * The slot should be unlocked before reading from the backing
2156 * device.
2157 */
2158 slot_unlock(zram, index);
2159 ret = read_from_bdev(zram, page, index, blk_idx, parent);
2160 }
2161
2162 /* Should NEVER happen. Return bio error if it does. */
2163 if (WARN_ON(ret < 0))
2164 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
2165
2166 return ret;
2167 }
2168
2169 /*
2170 * Use a temporary buffer to decompress the page, as the decompressor
2171 * always expects a full page for the output.
2172 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)2173 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
2174 u32 index, int offset)
2175 {
2176 struct page *page = alloc_page(GFP_NOIO);
2177 int ret;
2178
2179 if (!page)
2180 return -ENOMEM;
2181 ret = zram_read_page(zram, page, index, NULL);
2182 if (likely(!ret))
2183 memcpy_to_bvec(bvec, page_address(page) + offset);
2184 __free_page(page);
2185 return ret;
2186 }
2187
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2188 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
2189 u32 index, int offset, struct bio *bio)
2190 {
2191 if (is_partial_io(bvec))
2192 return zram_bvec_read_partial(zram, bvec, index, offset);
2193 return zram_read_page(zram, bvec->bv_page, index, bio);
2194 }
2195
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)2196 static int write_same_filled_page(struct zram *zram, unsigned long fill,
2197 u32 index)
2198 {
2199 slot_lock(zram, index);
2200 slot_free(zram, index);
2201 set_slot_flag(zram, index, ZRAM_SAME);
2202 set_slot_handle(zram, index, fill);
2203 slot_unlock(zram, index);
2204
2205 atomic64_inc(&zram->stats.same_pages);
2206 atomic64_inc(&zram->stats.pages_stored);
2207
2208 return 0;
2209 }
2210
write_incompressible_page(struct zram * zram,struct page * page,u32 index)2211 static int write_incompressible_page(struct zram *zram, struct page *page,
2212 u32 index)
2213 {
2214 unsigned long handle;
2215 void *src;
2216
2217 /*
2218 * This function is called from preemptible context so we don't need
2219 * to do optimistic and fallback to pessimistic handle allocation,
2220 * like we do for compressible pages.
2221 */
2222 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
2223 GFP_NOIO | __GFP_NOWARN |
2224 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2225 if (IS_ERR_VALUE(handle))
2226 return PTR_ERR((void *)handle);
2227
2228 if (!zram_can_store_page(zram)) {
2229 zs_free(zram->mem_pool, handle);
2230 return -ENOMEM;
2231 }
2232
2233 src = kmap_local_page(page);
2234 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
2235 kunmap_local(src);
2236
2237 slot_lock(zram, index);
2238 slot_free(zram, index);
2239 set_slot_flag(zram, index, ZRAM_HUGE);
2240 set_slot_handle(zram, index, handle);
2241 set_slot_size(zram, index, PAGE_SIZE);
2242 slot_unlock(zram, index);
2243
2244 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
2245 atomic64_inc(&zram->stats.huge_pages);
2246 atomic64_inc(&zram->stats.huge_pages_since);
2247 atomic64_inc(&zram->stats.pages_stored);
2248
2249 return 0;
2250 }
2251
zram_write_page(struct zram * zram,struct page * page,u32 index)2252 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
2253 {
2254 int ret = 0;
2255 unsigned long handle;
2256 unsigned int comp_len;
2257 void *mem;
2258 struct zcomp_strm *zstrm;
2259 unsigned long element;
2260 bool same_filled;
2261
2262 mem = kmap_local_page(page);
2263 same_filled = page_same_filled(mem, &element);
2264 kunmap_local(mem);
2265 if (same_filled)
2266 return write_same_filled_page(zram, element, index);
2267
2268 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2269 mem = kmap_local_page(page);
2270 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
2271 mem, &comp_len);
2272 kunmap_local(mem);
2273
2274 if (unlikely(ret)) {
2275 zcomp_stream_put(zstrm);
2276 pr_err("Compression failed! err=%d\n", ret);
2277 return ret;
2278 }
2279
2280 if (comp_len >= huge_class_size) {
2281 zcomp_stream_put(zstrm);
2282 return write_incompressible_page(zram, page, index);
2283 }
2284
2285 handle = zs_malloc(zram->mem_pool, comp_len,
2286 GFP_NOIO | __GFP_NOWARN |
2287 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2288 if (IS_ERR_VALUE(handle)) {
2289 zcomp_stream_put(zstrm);
2290 return PTR_ERR((void *)handle);
2291 }
2292
2293 if (!zram_can_store_page(zram)) {
2294 zcomp_stream_put(zstrm);
2295 zs_free(zram->mem_pool, handle);
2296 return -ENOMEM;
2297 }
2298
2299 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
2300 zcomp_stream_put(zstrm);
2301
2302 slot_lock(zram, index);
2303 slot_free(zram, index);
2304 set_slot_handle(zram, index, handle);
2305 set_slot_size(zram, index, comp_len);
2306 slot_unlock(zram, index);
2307
2308 /* Update stats */
2309 atomic64_inc(&zram->stats.pages_stored);
2310 atomic64_add(comp_len, &zram->stats.compr_data_size);
2311
2312 return ret;
2313 }
2314
2315 /*
2316 * This is a partial IO. Read the full page before writing the changes.
2317 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2318 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
2319 u32 index, int offset, struct bio *bio)
2320 {
2321 struct page *page = alloc_page(GFP_NOIO);
2322 int ret;
2323
2324 if (!page)
2325 return -ENOMEM;
2326
2327 ret = zram_read_page(zram, page, index, bio);
2328 if (!ret) {
2329 memcpy_from_bvec(page_address(page) + offset, bvec);
2330 ret = zram_write_page(zram, page, index);
2331 }
2332 __free_page(page);
2333 return ret;
2334 }
2335
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2336 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
2337 u32 index, int offset, struct bio *bio)
2338 {
2339 if (is_partial_io(bvec))
2340 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
2341 return zram_write_page(zram, bvec->bv_page, index);
2342 }
2343
2344 #ifdef CONFIG_ZRAM_MULTI_COMP
2345 #define RECOMPRESS_IDLE (1 << 0)
2346 #define RECOMPRESS_HUGE (1 << 1)
2347
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio_max,struct zram_pp_ctl * ctl)2348 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
2349 struct zram_pp_ctl *ctl)
2350 {
2351 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
2352 unsigned long index;
2353
2354 for (index = 0; index < nr_pages; index++) {
2355 bool ok = true;
2356
2357 slot_lock(zram, index);
2358 if (!slot_allocated(zram, index))
2359 goto next;
2360
2361 if (mode & RECOMPRESS_IDLE &&
2362 !test_slot_flag(zram, index, ZRAM_IDLE))
2363 goto next;
2364
2365 if (mode & RECOMPRESS_HUGE &&
2366 !test_slot_flag(zram, index, ZRAM_HUGE))
2367 goto next;
2368
2369 if (test_slot_flag(zram, index, ZRAM_WB) ||
2370 test_slot_flag(zram, index, ZRAM_SAME) ||
2371 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
2372 goto next;
2373
2374 /* Already compressed with same of higher priority */
2375 if (get_slot_comp_priority(zram, index) + 1 >= prio_max)
2376 goto next;
2377
2378 ok = place_pp_slot(zram, ctl, index);
2379 next:
2380 slot_unlock(zram, index);
2381 if (!ok)
2382 break;
2383 }
2384
2385 return 0;
2386 }
2387
2388 /*
2389 * This function will decompress (unless it's ZRAM_HUGE) the page and then
2390 * attempt to compress it using provided compression algorithm priority
2391 * (which is potentially more effective).
2392 *
2393 * Corresponding ZRAM slot should be locked.
2394 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio,u32 prio_max)2395 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
2396 u64 *num_recomp_pages, u32 threshold, u32 prio,
2397 u32 prio_max)
2398 {
2399 struct zcomp_strm *zstrm = NULL;
2400 unsigned long handle_old;
2401 unsigned long handle_new;
2402 unsigned int comp_len_old;
2403 unsigned int comp_len_new;
2404 unsigned int class_index_old;
2405 unsigned int class_index_new;
2406 void *src;
2407 int ret = 0;
2408
2409 handle_old = get_slot_handle(zram, index);
2410 if (!handle_old)
2411 return -EINVAL;
2412
2413 comp_len_old = get_slot_size(zram, index);
2414 /*
2415 * Do not recompress objects that are already "small enough".
2416 */
2417 if (comp_len_old < threshold)
2418 return 0;
2419
2420 ret = read_from_zspool(zram, page, index);
2421 if (ret)
2422 return ret;
2423
2424 /*
2425 * We touched this entry so mark it as non-IDLE. This makes sure that
2426 * we don't preserve IDLE flag and don't incorrectly pick this entry
2427 * for different post-processing type (e.g. writeback).
2428 */
2429 clear_slot_flag(zram, index, ZRAM_IDLE);
2430
2431 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2432
2433 prio = max(prio, get_slot_comp_priority(zram, index) + 1);
2434 /*
2435 * Recompression slots scan should not select slots that are
2436 * already compressed with a higher priority algorithm, but
2437 * just in case
2438 */
2439 if (prio >= prio_max)
2440 return 0;
2441
2442 /*
2443 * Iterate the secondary comp algorithms list (in order of priority)
2444 * and try to recompress the page.
2445 */
2446 for (; prio < prio_max; prio++) {
2447 if (!zram->comps[prio])
2448 continue;
2449
2450 zstrm = zcomp_stream_get(zram->comps[prio]);
2451 src = kmap_local_page(page);
2452 ret = zcomp_compress(zram->comps[prio], zstrm,
2453 src, &comp_len_new);
2454 kunmap_local(src);
2455
2456 if (ret) {
2457 zcomp_stream_put(zstrm);
2458 zstrm = NULL;
2459 break;
2460 }
2461
2462 class_index_new = zs_lookup_class_index(zram->mem_pool,
2463 comp_len_new);
2464
2465 /* Continue until we make progress */
2466 if (class_index_new >= class_index_old ||
2467 (threshold && comp_len_new >= threshold)) {
2468 zcomp_stream_put(zstrm);
2469 zstrm = NULL;
2470 continue;
2471 }
2472
2473 /* Recompression was successful so break out */
2474 break;
2475 }
2476
2477 /*
2478 * Decrement the limit (if set) on pages we can recompress, even
2479 * when current recompression was unsuccessful or did not compress
2480 * the page below the threshold, because we still spent resources
2481 * on it.
2482 */
2483 if (*num_recomp_pages)
2484 *num_recomp_pages -= 1;
2485
2486 /* Compression error */
2487 if (ret)
2488 return ret;
2489
2490 if (!zstrm) {
2491 /*
2492 * Secondary algorithms failed to re-compress the page
2493 * in a way that would save memory.
2494 *
2495 * Mark the object incompressible if the max-priority
2496 * algorithm couldn't re-compress it.
2497 */
2498 if (prio < zram->num_active_comps)
2499 return 0;
2500 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2501 return 0;
2502 }
2503
2504 /*
2505 * We are holding per-CPU stream mutex and entry lock so better
2506 * avoid direct reclaim. Allocation error is not fatal since
2507 * we still have the old object in the mem_pool.
2508 *
2509 * XXX: technically, the node we really want here is the node that
2510 * holds the original compressed data. But that would require us to
2511 * modify zsmalloc API to return this information. For now, we will
2512 * make do with the node of the page allocated for recompression.
2513 */
2514 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2515 GFP_NOIO | __GFP_NOWARN |
2516 __GFP_HIGHMEM | __GFP_MOVABLE,
2517 page_to_nid(page));
2518 if (IS_ERR_VALUE(handle_new)) {
2519 zcomp_stream_put(zstrm);
2520 return PTR_ERR((void *)handle_new);
2521 }
2522
2523 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2524 zcomp_stream_put(zstrm);
2525
2526 slot_free(zram, index);
2527 set_slot_handle(zram, index, handle_new);
2528 set_slot_size(zram, index, comp_len_new);
2529 set_slot_comp_priority(zram, index, prio);
2530
2531 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2532 atomic64_inc(&zram->stats.pages_stored);
2533
2534 return 0;
2535 }
2536
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2537 static ssize_t recompress_store(struct device *dev,
2538 struct device_attribute *attr,
2539 const char *buf, size_t len)
2540 {
2541 struct zram *zram = dev_to_zram(dev);
2542 char *args, *param, *val, *algo = NULL;
2543 u64 num_recomp_pages = ULLONG_MAX;
2544 struct zram_pp_ctl *ctl = NULL;
2545 struct zram_pp_slot *pps;
2546 u32 mode = 0, threshold = 0;
2547 u32 prio, prio_max;
2548 struct page *page = NULL;
2549 ssize_t ret;
2550
2551 prio = ZRAM_SECONDARY_COMP;
2552 prio_max = zram->num_active_comps;
2553
2554 args = skip_spaces(buf);
2555 while (*args) {
2556 args = next_arg(args, ¶m, &val);
2557
2558 if (!val || !*val)
2559 return -EINVAL;
2560
2561 if (!strcmp(param, "type")) {
2562 if (!strcmp(val, "idle"))
2563 mode = RECOMPRESS_IDLE;
2564 if (!strcmp(val, "huge"))
2565 mode = RECOMPRESS_HUGE;
2566 if (!strcmp(val, "huge_idle"))
2567 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2568 continue;
2569 }
2570
2571 if (!strcmp(param, "max_pages")) {
2572 /*
2573 * Limit the number of entries (pages) we attempt to
2574 * recompress.
2575 */
2576 ret = kstrtoull(val, 10, &num_recomp_pages);
2577 if (ret)
2578 return ret;
2579 continue;
2580 }
2581
2582 if (!strcmp(param, "threshold")) {
2583 /*
2584 * We will re-compress only idle objects equal or
2585 * greater in size than watermark.
2586 */
2587 ret = kstrtouint(val, 10, &threshold);
2588 if (ret)
2589 return ret;
2590 continue;
2591 }
2592
2593 if (!strcmp(param, "algo")) {
2594 algo = val;
2595 continue;
2596 }
2597
2598 if (!strcmp(param, "priority")) {
2599 ret = kstrtouint(val, 10, &prio);
2600 if (ret)
2601 return ret;
2602
2603 if (prio == ZRAM_PRIMARY_COMP)
2604 prio = ZRAM_SECONDARY_COMP;
2605
2606 prio_max = prio + 1;
2607 continue;
2608 }
2609 }
2610
2611 if (threshold >= huge_class_size)
2612 return -EINVAL;
2613
2614 guard(rwsem_write)(&zram->dev_lock);
2615 if (!init_done(zram))
2616 return -EINVAL;
2617
2618 if (algo) {
2619 bool found = false;
2620
2621 for (; prio < ZRAM_MAX_COMPS; prio++) {
2622 if (!zram->comp_algs[prio])
2623 continue;
2624
2625 if (!strcmp(zram->comp_algs[prio], algo)) {
2626 prio_max = prio + 1;
2627 found = true;
2628 break;
2629 }
2630 }
2631
2632 if (!found) {
2633 ret = -EINVAL;
2634 goto out;
2635 }
2636 }
2637
2638 prio_max = min(prio_max, (u32)zram->num_active_comps);
2639 if (prio >= prio_max) {
2640 ret = -EINVAL;
2641 goto out;
2642 }
2643
2644 page = alloc_page(GFP_KERNEL);
2645 if (!page) {
2646 ret = -ENOMEM;
2647 goto out;
2648 }
2649
2650 ctl = init_pp_ctl();
2651 if (!ctl) {
2652 ret = -ENOMEM;
2653 goto out;
2654 }
2655
2656 scan_slots_for_recompress(zram, mode, prio_max, ctl);
2657
2658 ret = len;
2659 while ((pps = select_pp_slot(ctl))) {
2660 int err = 0;
2661
2662 if (!num_recomp_pages)
2663 break;
2664
2665 slot_lock(zram, pps->index);
2666 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT))
2667 goto next;
2668
2669 err = recompress_slot(zram, pps->index, page,
2670 &num_recomp_pages, threshold,
2671 prio, prio_max);
2672 next:
2673 slot_unlock(zram, pps->index);
2674 release_pp_slot(zram, pps);
2675
2676 if (err) {
2677 ret = err;
2678 break;
2679 }
2680
2681 cond_resched();
2682 }
2683
2684 out:
2685 if (page)
2686 __free_page(page);
2687 release_pp_ctl(zram, ctl);
2688 return ret;
2689 }
2690 #endif
2691
zram_bio_discard(struct zram * zram,struct bio * bio)2692 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2693 {
2694 size_t n = bio->bi_iter.bi_size;
2695 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2696 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2697 SECTOR_SHIFT;
2698
2699 /*
2700 * zram manages data in physical block size units. Because logical block
2701 * size isn't identical with physical block size on some arch, we
2702 * could get a discard request pointing to a specific offset within a
2703 * certain physical block. Although we can handle this request by
2704 * reading that physiclal block and decompressing and partially zeroing
2705 * and re-compressing and then re-storing it, this isn't reasonable
2706 * because our intent with a discard request is to save memory. So
2707 * skipping this logical block is appropriate here.
2708 */
2709 if (offset) {
2710 if (n <= (PAGE_SIZE - offset))
2711 return;
2712
2713 n -= (PAGE_SIZE - offset);
2714 index++;
2715 }
2716
2717 while (n >= PAGE_SIZE) {
2718 slot_lock(zram, index);
2719 slot_free(zram, index);
2720 slot_unlock(zram, index);
2721 atomic64_inc(&zram->stats.notify_free);
2722 index++;
2723 n -= PAGE_SIZE;
2724 }
2725
2726 bio_endio(bio);
2727 }
2728
zram_bio_read(struct zram * zram,struct bio * bio)2729 static void zram_bio_read(struct zram *zram, struct bio *bio)
2730 {
2731 unsigned long start_time = bio_start_io_acct(bio);
2732 struct bvec_iter iter = bio->bi_iter;
2733
2734 do {
2735 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2736 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2737 SECTOR_SHIFT;
2738 struct bio_vec bv = bio_iter_iovec(bio, iter);
2739
2740 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2741
2742 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2743 atomic64_inc(&zram->stats.failed_reads);
2744 bio->bi_status = BLK_STS_IOERR;
2745 break;
2746 }
2747 flush_dcache_page(bv.bv_page);
2748
2749 slot_lock(zram, index);
2750 mark_slot_accessed(zram, index);
2751 slot_unlock(zram, index);
2752
2753 bio_advance_iter_single(bio, &iter, bv.bv_len);
2754 } while (iter.bi_size);
2755
2756 bio_end_io_acct(bio, start_time);
2757 bio_endio(bio);
2758 }
2759
zram_bio_write(struct zram * zram,struct bio * bio)2760 static void zram_bio_write(struct zram *zram, struct bio *bio)
2761 {
2762 unsigned long start_time = bio_start_io_acct(bio);
2763 struct bvec_iter iter = bio->bi_iter;
2764
2765 do {
2766 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2767 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2768 SECTOR_SHIFT;
2769 struct bio_vec bv = bio_iter_iovec(bio, iter);
2770
2771 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2772
2773 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2774 atomic64_inc(&zram->stats.failed_writes);
2775 bio->bi_status = BLK_STS_IOERR;
2776 break;
2777 }
2778
2779 slot_lock(zram, index);
2780 mark_slot_accessed(zram, index);
2781 slot_unlock(zram, index);
2782
2783 bio_advance_iter_single(bio, &iter, bv.bv_len);
2784 } while (iter.bi_size);
2785
2786 bio_end_io_acct(bio, start_time);
2787 bio_endio(bio);
2788 }
2789
2790 /*
2791 * Handler function for all zram I/O requests.
2792 */
zram_submit_bio(struct bio * bio)2793 static void zram_submit_bio(struct bio *bio)
2794 {
2795 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2796
2797 switch (bio_op(bio)) {
2798 case REQ_OP_READ:
2799 zram_bio_read(zram, bio);
2800 break;
2801 case REQ_OP_WRITE:
2802 zram_bio_write(zram, bio);
2803 break;
2804 case REQ_OP_DISCARD:
2805 case REQ_OP_WRITE_ZEROES:
2806 zram_bio_discard(zram, bio);
2807 break;
2808 default:
2809 WARN_ON_ONCE(1);
2810 bio_endio(bio);
2811 }
2812 }
2813
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2814 static void zram_slot_free_notify(struct block_device *bdev,
2815 unsigned long index)
2816 {
2817 struct zram *zram;
2818
2819 zram = bdev->bd_disk->private_data;
2820
2821 atomic64_inc(&zram->stats.notify_free);
2822 if (!slot_trylock(zram, index)) {
2823 atomic64_inc(&zram->stats.miss_free);
2824 return;
2825 }
2826
2827 slot_free(zram, index);
2828 slot_unlock(zram, index);
2829 }
2830
zram_comp_params_reset(struct zram * zram)2831 static void zram_comp_params_reset(struct zram *zram)
2832 {
2833 u32 prio;
2834
2835 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2836 comp_params_reset(zram, prio);
2837 }
2838 }
2839
zram_destroy_comps(struct zram * zram)2840 static void zram_destroy_comps(struct zram *zram)
2841 {
2842 u32 prio;
2843
2844 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2845 struct zcomp *comp = zram->comps[prio];
2846
2847 zram->comps[prio] = NULL;
2848 if (!comp)
2849 continue;
2850 zcomp_destroy(comp);
2851 zram->num_active_comps--;
2852 }
2853
2854 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2855 /* Do not free statically defined compression algorithms */
2856 if (zram->comp_algs[prio] != default_compressor)
2857 kfree(zram->comp_algs[prio]);
2858 zram->comp_algs[prio] = NULL;
2859 }
2860
2861 zram_comp_params_reset(zram);
2862 }
2863
zram_reset_device(struct zram * zram)2864 static void zram_reset_device(struct zram *zram)
2865 {
2866 guard(rwsem_write)(&zram->dev_lock);
2867
2868 zram->limit_pages = 0;
2869
2870 set_capacity_and_notify(zram->disk, 0);
2871 part_stat_set_all(zram->disk->part0, 0);
2872
2873 /* I/O operation under all of CPU are done so let's free */
2874 zram_meta_free(zram, zram->disksize);
2875 zram->disksize = 0;
2876 zram_destroy_comps(zram);
2877 memset(&zram->stats, 0, sizeof(zram->stats));
2878 reset_bdev(zram);
2879
2880 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2881 }
2882
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2883 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr,
2884 const char *buf, size_t len)
2885 {
2886 u64 disksize;
2887 struct zcomp *comp;
2888 struct zram *zram = dev_to_zram(dev);
2889 int err;
2890 u32 prio;
2891
2892 disksize = memparse(buf, NULL);
2893 if (!disksize)
2894 return -EINVAL;
2895
2896 guard(rwsem_write)(&zram->dev_lock);
2897 if (init_done(zram)) {
2898 pr_info("Cannot change disksize for initialized device\n");
2899 return -EBUSY;
2900 }
2901
2902 disksize = PAGE_ALIGN(disksize);
2903 if (!zram_meta_alloc(zram, disksize))
2904 return -ENOMEM;
2905
2906 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2907 if (!zram->comp_algs[prio])
2908 continue;
2909
2910 comp = zcomp_create(zram->comp_algs[prio],
2911 &zram->params[prio]);
2912 if (IS_ERR(comp)) {
2913 pr_err("Cannot initialise %s compressing backend\n",
2914 zram->comp_algs[prio]);
2915 err = PTR_ERR(comp);
2916 goto out_free_comps;
2917 }
2918
2919 zram->comps[prio] = comp;
2920 zram->num_active_comps++;
2921 }
2922 zram->disksize = disksize;
2923 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2924
2925 return len;
2926
2927 out_free_comps:
2928 zram_destroy_comps(zram);
2929 zram_meta_free(zram, disksize);
2930 return err;
2931 }
2932
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2933 static ssize_t reset_store(struct device *dev,
2934 struct device_attribute *attr, const char *buf, size_t len)
2935 {
2936 int ret;
2937 unsigned short do_reset;
2938 struct zram *zram;
2939 struct gendisk *disk;
2940
2941 ret = kstrtou16(buf, 10, &do_reset);
2942 if (ret)
2943 return ret;
2944
2945 if (!do_reset)
2946 return -EINVAL;
2947
2948 zram = dev_to_zram(dev);
2949 disk = zram->disk;
2950
2951 mutex_lock(&disk->open_mutex);
2952 /* Do not reset an active device or claimed device */
2953 if (disk_openers(disk) || zram->claim) {
2954 mutex_unlock(&disk->open_mutex);
2955 return -EBUSY;
2956 }
2957
2958 /* From now on, anyone can't open /dev/zram[0-9] */
2959 zram->claim = true;
2960 mutex_unlock(&disk->open_mutex);
2961
2962 /* Make sure all the pending I/O are finished */
2963 sync_blockdev(disk->part0);
2964 zram_reset_device(zram);
2965
2966 mutex_lock(&disk->open_mutex);
2967 zram->claim = false;
2968 mutex_unlock(&disk->open_mutex);
2969
2970 return len;
2971 }
2972
zram_open(struct gendisk * disk,blk_mode_t mode)2973 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2974 {
2975 struct zram *zram = disk->private_data;
2976
2977 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2978
2979 /* zram was claimed to reset so open request fails */
2980 if (zram->claim)
2981 return -EBUSY;
2982 return 0;
2983 }
2984
2985 static const struct block_device_operations zram_devops = {
2986 .open = zram_open,
2987 .submit_bio = zram_submit_bio,
2988 .swap_slot_free_notify = zram_slot_free_notify,
2989 .owner = THIS_MODULE
2990 };
2991
2992 static DEVICE_ATTR_RO(io_stat);
2993 static DEVICE_ATTR_RO(mm_stat);
2994 static DEVICE_ATTR_RO(debug_stat);
2995 static DEVICE_ATTR_WO(compact);
2996 static DEVICE_ATTR_RW(disksize);
2997 static DEVICE_ATTR_RO(initstate);
2998 static DEVICE_ATTR_WO(reset);
2999 static DEVICE_ATTR_WO(mem_limit);
3000 static DEVICE_ATTR_WO(mem_used_max);
3001 static DEVICE_ATTR_WO(idle);
3002 static DEVICE_ATTR_RW(comp_algorithm);
3003 #ifdef CONFIG_ZRAM_WRITEBACK
3004 static DEVICE_ATTR_RO(bd_stat);
3005 static DEVICE_ATTR_RW(backing_dev);
3006 static DEVICE_ATTR_WO(writeback);
3007 static DEVICE_ATTR_RW(writeback_limit);
3008 static DEVICE_ATTR_RW(writeback_limit_enable);
3009 static DEVICE_ATTR_RW(writeback_batch_size);
3010 static DEVICE_ATTR_RW(writeback_compressed);
3011 #endif
3012 #ifdef CONFIG_ZRAM_MULTI_COMP
3013 static DEVICE_ATTR_RW(recomp_algorithm);
3014 static DEVICE_ATTR_WO(recompress);
3015 #endif
3016 static DEVICE_ATTR_WO(algorithm_params);
3017
3018 static struct attribute *zram_disk_attrs[] = {
3019 &dev_attr_disksize.attr,
3020 &dev_attr_initstate.attr,
3021 &dev_attr_reset.attr,
3022 &dev_attr_compact.attr,
3023 &dev_attr_mem_limit.attr,
3024 &dev_attr_mem_used_max.attr,
3025 &dev_attr_idle.attr,
3026 &dev_attr_comp_algorithm.attr,
3027 #ifdef CONFIG_ZRAM_WRITEBACK
3028 &dev_attr_bd_stat.attr,
3029 &dev_attr_backing_dev.attr,
3030 &dev_attr_writeback.attr,
3031 &dev_attr_writeback_limit.attr,
3032 &dev_attr_writeback_limit_enable.attr,
3033 &dev_attr_writeback_batch_size.attr,
3034 &dev_attr_writeback_compressed.attr,
3035 #endif
3036 &dev_attr_io_stat.attr,
3037 &dev_attr_mm_stat.attr,
3038 &dev_attr_debug_stat.attr,
3039 #ifdef CONFIG_ZRAM_MULTI_COMP
3040 &dev_attr_recomp_algorithm.attr,
3041 &dev_attr_recompress.attr,
3042 #endif
3043 &dev_attr_algorithm_params.attr,
3044 NULL,
3045 };
3046
3047 ATTRIBUTE_GROUPS(zram_disk);
3048
3049 /*
3050 * Allocate and initialize new zram device. the function returns
3051 * '>= 0' device_id upon success, and negative value otherwise.
3052 */
zram_add(void)3053 static int zram_add(void)
3054 {
3055 struct queue_limits lim = {
3056 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
3057 /*
3058 * To ensure that we always get PAGE_SIZE aligned and
3059 * n*PAGE_SIZED sized I/O requests.
3060 */
3061 .physical_block_size = PAGE_SIZE,
3062 .io_min = PAGE_SIZE,
3063 .io_opt = PAGE_SIZE,
3064 .max_hw_discard_sectors = UINT_MAX,
3065 /*
3066 * zram_bio_discard() will clear all logical blocks if logical
3067 * block size is identical with physical block size(PAGE_SIZE).
3068 * But if it is different, we will skip discarding some parts of
3069 * logical blocks in the part of the request range which isn't
3070 * aligned to physical block size. So we can't ensure that all
3071 * discarded logical blocks are zeroed.
3072 */
3073 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
3074 .max_write_zeroes_sectors = UINT_MAX,
3075 #endif
3076 .features = BLK_FEAT_STABLE_WRITES |
3077 BLK_FEAT_SYNCHRONOUS,
3078 };
3079 struct zram *zram;
3080 int ret, device_id;
3081
3082 zram = kzalloc_obj(struct zram);
3083 if (!zram)
3084 return -ENOMEM;
3085
3086 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
3087 if (ret < 0)
3088 goto out_free_dev;
3089 device_id = ret;
3090
3091 init_rwsem(&zram->dev_lock);
3092 #ifdef CONFIG_ZRAM_WRITEBACK
3093 zram->wb_batch_size = 32;
3094 zram->wb_compressed = false;
3095 #endif
3096
3097 /* gendisk structure */
3098 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
3099 if (IS_ERR(zram->disk)) {
3100 pr_err("Error allocating disk structure for device %d\n",
3101 device_id);
3102 ret = PTR_ERR(zram->disk);
3103 goto out_free_idr;
3104 }
3105
3106 zram->disk->major = zram_major;
3107 zram->disk->first_minor = device_id;
3108 zram->disk->minors = 1;
3109 zram->disk->flags |= GENHD_FL_NO_PART;
3110 zram->disk->fops = &zram_devops;
3111 zram->disk->private_data = zram;
3112 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
3113 zram_comp_params_reset(zram);
3114 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
3115
3116 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
3117 set_capacity(zram->disk, 0);
3118 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
3119 if (ret)
3120 goto out_cleanup_disk;
3121
3122 zram_debugfs_register(zram);
3123 pr_info("Added device: %s\n", zram->disk->disk_name);
3124 return device_id;
3125
3126 out_cleanup_disk:
3127 put_disk(zram->disk);
3128 out_free_idr:
3129 idr_remove(&zram_index_idr, device_id);
3130 out_free_dev:
3131 kfree(zram);
3132 return ret;
3133 }
3134
zram_remove(struct zram * zram)3135 static int zram_remove(struct zram *zram)
3136 {
3137 bool claimed;
3138
3139 mutex_lock(&zram->disk->open_mutex);
3140 if (disk_openers(zram->disk)) {
3141 mutex_unlock(&zram->disk->open_mutex);
3142 return -EBUSY;
3143 }
3144
3145 claimed = zram->claim;
3146 if (!claimed)
3147 zram->claim = true;
3148 mutex_unlock(&zram->disk->open_mutex);
3149
3150 zram_debugfs_unregister(zram);
3151
3152 if (claimed) {
3153 /*
3154 * If we were claimed by reset_store(), del_gendisk() will
3155 * wait until reset_store() is done, so nothing need to do.
3156 */
3157 ;
3158 } else {
3159 /* Make sure all the pending I/O are finished */
3160 sync_blockdev(zram->disk->part0);
3161 zram_reset_device(zram);
3162 }
3163
3164 pr_info("Removed device: %s\n", zram->disk->disk_name);
3165
3166 del_gendisk(zram->disk);
3167
3168 /* del_gendisk drains pending reset_store */
3169 WARN_ON_ONCE(claimed && zram->claim);
3170
3171 /*
3172 * disksize_store() may be called in between zram_reset_device()
3173 * and del_gendisk(), so run the last reset to avoid leaking
3174 * anything allocated with disksize_store()
3175 */
3176 zram_reset_device(zram);
3177
3178 put_disk(zram->disk);
3179 kfree(zram);
3180 return 0;
3181 }
3182
3183 /* zram-control sysfs attributes */
3184
3185 /*
3186 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
3187 * sense that reading from this file does alter the state of your system -- it
3188 * creates a new un-initialized zram device and returns back this device's
3189 * device_id (or an error code if it fails to create a new device).
3190 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)3191 static ssize_t hot_add_show(const struct class *class,
3192 const struct class_attribute *attr,
3193 char *buf)
3194 {
3195 int ret;
3196
3197 mutex_lock(&zram_index_mutex);
3198 ret = zram_add();
3199 mutex_unlock(&zram_index_mutex);
3200
3201 if (ret < 0)
3202 return ret;
3203 return sysfs_emit(buf, "%d\n", ret);
3204 }
3205 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
3206 static struct class_attribute class_attr_hot_add =
3207 __ATTR(hot_add, 0400, hot_add_show, NULL);
3208
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)3209 static ssize_t hot_remove_store(const struct class *class,
3210 const struct class_attribute *attr,
3211 const char *buf,
3212 size_t count)
3213 {
3214 struct zram *zram;
3215 int ret, dev_id;
3216
3217 /* dev_id is gendisk->first_minor, which is `int' */
3218 ret = kstrtoint(buf, 10, &dev_id);
3219 if (ret)
3220 return ret;
3221 if (dev_id < 0)
3222 return -EINVAL;
3223
3224 mutex_lock(&zram_index_mutex);
3225
3226 zram = idr_find(&zram_index_idr, dev_id);
3227 if (zram) {
3228 ret = zram_remove(zram);
3229 if (!ret)
3230 idr_remove(&zram_index_idr, dev_id);
3231 } else {
3232 ret = -ENODEV;
3233 }
3234
3235 mutex_unlock(&zram_index_mutex);
3236 return ret ? ret : count;
3237 }
3238 static CLASS_ATTR_WO(hot_remove);
3239
3240 static struct attribute *zram_control_class_attrs[] = {
3241 &class_attr_hot_add.attr,
3242 &class_attr_hot_remove.attr,
3243 NULL,
3244 };
3245 ATTRIBUTE_GROUPS(zram_control_class);
3246
3247 static struct class zram_control_class = {
3248 .name = "zram-control",
3249 .class_groups = zram_control_class_groups,
3250 };
3251
zram_remove_cb(int id,void * ptr,void * data)3252 static int zram_remove_cb(int id, void *ptr, void *data)
3253 {
3254 WARN_ON_ONCE(zram_remove(ptr));
3255 return 0;
3256 }
3257
destroy_devices(void)3258 static void destroy_devices(void)
3259 {
3260 class_unregister(&zram_control_class);
3261 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
3262 zram_debugfs_destroy();
3263 idr_destroy(&zram_index_idr);
3264 unregister_blkdev(zram_major, "zram");
3265 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3266 }
3267
zram_init(void)3268 static int __init zram_init(void)
3269 {
3270 struct zram_table_entry zram_te;
3271 int ret;
3272
3273 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8);
3274
3275 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
3276 zcomp_cpu_up_prepare, zcomp_cpu_dead);
3277 if (ret < 0)
3278 return ret;
3279
3280 ret = class_register(&zram_control_class);
3281 if (ret) {
3282 pr_err("Unable to register zram-control class\n");
3283 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3284 return ret;
3285 }
3286
3287 zram_debugfs_create();
3288 zram_major = register_blkdev(0, "zram");
3289 if (zram_major <= 0) {
3290 pr_err("Unable to get major number\n");
3291 class_unregister(&zram_control_class);
3292 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3293 return -EBUSY;
3294 }
3295
3296 while (num_devices != 0) {
3297 mutex_lock(&zram_index_mutex);
3298 ret = zram_add();
3299 mutex_unlock(&zram_index_mutex);
3300 if (ret < 0)
3301 goto out_error;
3302 num_devices--;
3303 }
3304
3305 return 0;
3306
3307 out_error:
3308 destroy_devices();
3309 return ret;
3310 }
3311
zram_exit(void)3312 static void __exit zram_exit(void)
3313 {
3314 destroy_devices();
3315 }
3316
3317 module_init(zram_init);
3318 module_exit(zram_exit);
3319
3320 module_param(num_devices, uint, 0);
3321 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
3322
3323 MODULE_LICENSE("Dual BSD/GPL");
3324 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
3325 MODULE_DESCRIPTION("Compressed RAM Block Device");
3326