1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define pr_fmt(fmt) "zram: " fmt
16
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/bio.h>
20 #include <linux/bitops.h>
21 #include <linux/blkdev.h>
22 #include <linux/buffer_head.h>
23 #include <linux/device.h>
24 #include <linux/highmem.h>
25 #include <linux/slab.h>
26 #include <linux/backing-dev.h>
27 #include <linux/string.h>
28 #include <linux/vmalloc.h>
29 #include <linux/err.h>
30 #include <linux/idr.h>
31 #include <linux/sysfs.h>
32 #include <linux/debugfs.h>
33 #include <linux/cpuhotplug.h>
34 #include <linux/part_stat.h>
35 #include <linux/kernel_read_file.h>
36 #include <linux/rcupdate.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47 #define ZRAM_MAX_ALGO_NAME_SZ 128
48
49 /* Module params (documentation at end) */
50 static unsigned int num_devices = 1;
51 /*
52 * Pages that compress to sizes equals or greater than this are stored
53 * uncompressed in memory.
54 */
55 static size_t huge_class_size;
56
57 static const struct block_device_operations zram_devops;
58
59 static void slot_free(struct zram *zram, u32 index);
60 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
61
slot_lock_init(struct zram * zram,u32 index)62 static void slot_lock_init(struct zram *zram, u32 index)
63 {
64 static struct lock_class_key __key;
65
66 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
67 &__key, 0);
68 }
69
70 /*
71 * entry locking rules:
72 *
73 * 1) Lock is exclusive
74 *
75 * 2) lock() function can sleep waiting for the lock
76 *
77 * 3) Lock owner can sleep
78 *
79 * 4) Use TRY lock variant when in atomic context
80 * - must check return value and handle locking failers
81 */
slot_trylock(struct zram * zram,u32 index)82 static __must_check bool slot_trylock(struct zram *zram, u32 index)
83 {
84 unsigned long *lock = &zram->table[index].__lock;
85
86 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
87 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
88 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
89 return true;
90 }
91
92 return false;
93 }
94
slot_lock(struct zram * zram,u32 index)95 static void slot_lock(struct zram *zram, u32 index)
96 {
97 unsigned long *lock = &zram->table[index].__lock;
98
99 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
100 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
101 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
102 }
103
slot_unlock(struct zram * zram,u32 index)104 static void slot_unlock(struct zram *zram, u32 index)
105 {
106 unsigned long *lock = &zram->table[index].__lock;
107
108 mutex_release(slot_dep_map(zram, index), _RET_IP_);
109 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
110 }
111
init_done(struct zram * zram)112 static inline bool init_done(struct zram *zram)
113 {
114 return zram->disksize;
115 }
116
dev_to_zram(struct device * dev)117 static inline struct zram *dev_to_zram(struct device *dev)
118 {
119 return (struct zram *)dev_to_disk(dev)->private_data;
120 }
121
get_slot_handle(struct zram * zram,u32 index)122 static unsigned long get_slot_handle(struct zram *zram, u32 index)
123 {
124 return zram->table[index].handle;
125 }
126
set_slot_handle(struct zram * zram,u32 index,unsigned long handle)127 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle)
128 {
129 zram->table[index].handle = handle;
130 }
131
test_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)132 static bool test_slot_flag(struct zram *zram, u32 index,
133 enum zram_pageflags flag)
134 {
135 return zram->table[index].attr.flags & BIT(flag);
136 }
137
set_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)138 static void set_slot_flag(struct zram *zram, u32 index,
139 enum zram_pageflags flag)
140 {
141 zram->table[index].attr.flags |= BIT(flag);
142 }
143
clear_slot_flag(struct zram * zram,u32 index,enum zram_pageflags flag)144 static void clear_slot_flag(struct zram *zram, u32 index,
145 enum zram_pageflags flag)
146 {
147 zram->table[index].attr.flags &= ~BIT(flag);
148 }
149
get_slot_size(struct zram * zram,u32 index)150 static size_t get_slot_size(struct zram *zram, u32 index)
151 {
152 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
153 }
154
set_slot_size(struct zram * zram,u32 index,size_t size)155 static void set_slot_size(struct zram *zram, u32 index, size_t size)
156 {
157 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT;
158
159 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size;
160 }
161
slot_allocated(struct zram * zram,u32 index)162 static inline bool slot_allocated(struct zram *zram, u32 index)
163 {
164 return get_slot_size(zram, index) ||
165 test_slot_flag(zram, index, ZRAM_SAME) ||
166 test_slot_flag(zram, index, ZRAM_WB);
167 }
168
set_slot_comp_priority(struct zram * zram,u32 index,u32 prio)169 static inline void set_slot_comp_priority(struct zram *zram, u32 index,
170 u32 prio)
171 {
172 prio &= ZRAM_COMP_PRIORITY_MASK;
173 /*
174 * Clear previous priority value first, in case if we recompress
175 * further an already recompressed page
176 */
177 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
178 ZRAM_COMP_PRIORITY_BIT1);
179 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
180 }
181
get_slot_comp_priority(struct zram * zram,u32 index)182 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index)
183 {
184 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1;
185
186 return prio & ZRAM_COMP_PRIORITY_MASK;
187 }
188
mark_slot_accessed(struct zram * zram,u32 index)189 static void mark_slot_accessed(struct zram *zram, u32 index)
190 {
191 clear_slot_flag(zram, index, ZRAM_IDLE);
192 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
193 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
194 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds();
195 #endif
196 }
197
update_used_max(struct zram * zram,const unsigned long pages)198 static inline void update_used_max(struct zram *zram, const unsigned long pages)
199 {
200 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
201
202 do {
203 if (cur_max >= pages)
204 return;
205 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
206 &cur_max, pages));
207 }
208
zram_can_store_page(struct zram * zram)209 static bool zram_can_store_page(struct zram *zram)
210 {
211 unsigned long alloced_pages;
212
213 alloced_pages = zs_get_total_pages(zram->mem_pool);
214 update_used_max(zram, alloced_pages);
215
216 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
217 }
218
219 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)220 static inline bool is_partial_io(struct bio_vec *bvec)
221 {
222 return bvec->bv_len != PAGE_SIZE;
223 }
224 #define ZRAM_PARTIAL_IO 1
225 #else
is_partial_io(struct bio_vec * bvec)226 static inline bool is_partial_io(struct bio_vec *bvec)
227 {
228 return false;
229 }
230 #endif
231
232 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
233 struct zram_pp_slot {
234 unsigned long index;
235 struct list_head entry;
236 };
237
238 /*
239 * A post-processing bucket is, essentially, a size class, this defines
240 * the range (in bytes) of pp-slots sizes in particular bucket.
241 */
242 #define PP_BUCKET_SIZE_RANGE 64
243 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
244
245 struct zram_pp_ctl {
246 struct list_head pp_buckets[NUM_PP_BUCKETS];
247 };
248
init_pp_ctl(void)249 static struct zram_pp_ctl *init_pp_ctl(void)
250 {
251 struct zram_pp_ctl *ctl;
252 u32 idx;
253
254 ctl = kmalloc_obj(*ctl);
255 if (!ctl)
256 return NULL;
257
258 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
259 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
260 return ctl;
261 }
262
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)263 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
264 {
265 list_del_init(&pps->entry);
266
267 slot_lock(zram, pps->index);
268 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
269 slot_unlock(zram, pps->index);
270
271 kfree(pps);
272 }
273
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)274 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
275 {
276 u32 idx;
277
278 if (!ctl)
279 return;
280
281 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
282 while (!list_empty(&ctl->pp_buckets[idx])) {
283 struct zram_pp_slot *pps;
284
285 pps = list_first_entry(&ctl->pp_buckets[idx],
286 struct zram_pp_slot,
287 entry);
288 release_pp_slot(zram, pps);
289 }
290 }
291
292 kfree(ctl);
293 }
294
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)295 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
296 u32 index)
297 {
298 struct zram_pp_slot *pps;
299 u32 bid;
300
301 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN);
302 if (!pps)
303 return false;
304
305 INIT_LIST_HEAD(&pps->entry);
306 pps->index = index;
307
308 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
309 list_add(&pps->entry, &ctl->pp_buckets[bid]);
310
311 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT);
312 return true;
313 }
314
select_pp_slot(struct zram_pp_ctl * ctl)315 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
316 {
317 struct zram_pp_slot *pps = NULL;
318 s32 idx = NUM_PP_BUCKETS - 1;
319
320 /* The higher the bucket id the more optimal slot post-processing is */
321 while (idx >= 0) {
322 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
323 struct zram_pp_slot,
324 entry);
325 if (pps)
326 break;
327
328 idx--;
329 }
330 return pps;
331 }
332 #endif
333
zram_fill_page(void * ptr,unsigned long len,unsigned long value)334 static inline void zram_fill_page(void *ptr, unsigned long len,
335 unsigned long value)
336 {
337 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
338 memset_l(ptr, value, len / sizeof(unsigned long));
339 }
340
page_same_filled(void * ptr,unsigned long * element)341 static bool page_same_filled(void *ptr, unsigned long *element)
342 {
343 unsigned long *page;
344 unsigned long val;
345 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
346
347 page = (unsigned long *)ptr;
348 val = page[0];
349
350 if (val != page[last_pos])
351 return false;
352
353 for (pos = 1; pos < last_pos; pos++) {
354 if (val != page[pos])
355 return false;
356 }
357
358 *element = val;
359
360 return true;
361 }
362
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)363 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr,
364 char *buf)
365 {
366 u32 val;
367 struct zram *zram = dev_to_zram(dev);
368
369 guard(rwsem_read)(&zram->dev_lock);
370 val = init_done(zram);
371
372 return sysfs_emit(buf, "%u\n", val);
373 }
374
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)375 static ssize_t disksize_show(struct device *dev,
376 struct device_attribute *attr, char *buf)
377 {
378 struct zram *zram = dev_to_zram(dev);
379
380 return sysfs_emit(buf, "%llu\n", zram->disksize);
381 }
382
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)383 static ssize_t mem_limit_store(struct device *dev,
384 struct device_attribute *attr, const char *buf,
385 size_t len)
386 {
387 u64 limit;
388 char *tmp;
389 struct zram *zram = dev_to_zram(dev);
390
391 limit = memparse(buf, &tmp);
392 if (buf == tmp) /* no chars parsed, invalid input */
393 return -EINVAL;
394
395 guard(rwsem_write)(&zram->dev_lock);
396 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
397
398 return len;
399 }
400
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)401 static ssize_t mem_used_max_store(struct device *dev,
402 struct device_attribute *attr,
403 const char *buf, size_t len)
404 {
405 int err;
406 unsigned long val;
407 struct zram *zram = dev_to_zram(dev);
408
409 err = kstrtoul(buf, 10, &val);
410 if (err || val != 0)
411 return -EINVAL;
412
413 guard(rwsem_read)(&zram->dev_lock);
414 if (init_done(zram)) {
415 atomic_long_set(&zram->stats.max_used_pages,
416 zs_get_total_pages(zram->mem_pool));
417 }
418
419 return len;
420 }
421
422 /*
423 * Mark all pages which are older than or equal to cutoff as IDLE.
424 * Callers should hold the zram init lock in read mode
425 */
mark_idle(struct zram * zram,ktime_t cutoff)426 static void mark_idle(struct zram *zram, ktime_t cutoff)
427 {
428 int is_idle = 1;
429 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
430 int index;
431
432 for (index = 0; index < nr_pages; index++) {
433 /*
434 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
435 * post-processing (recompress, writeback) happens to the
436 * ZRAM_SAME slot.
437 *
438 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
439 */
440 slot_lock(zram, index);
441 if (!slot_allocated(zram, index) ||
442 test_slot_flag(zram, index, ZRAM_WB) ||
443 test_slot_flag(zram, index, ZRAM_SAME)) {
444 slot_unlock(zram, index);
445 continue;
446 }
447
448 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
449 is_idle = !cutoff ||
450 ktime_after(cutoff, zram->table[index].attr.ac_time);
451 #endif
452 if (is_idle)
453 set_slot_flag(zram, index, ZRAM_IDLE);
454 else
455 clear_slot_flag(zram, index, ZRAM_IDLE);
456 slot_unlock(zram, index);
457 }
458 }
459
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)460 static ssize_t idle_store(struct device *dev, struct device_attribute *attr,
461 const char *buf, size_t len)
462 {
463 struct zram *zram = dev_to_zram(dev);
464 ktime_t cutoff = 0;
465
466 if (!sysfs_streq(buf, "all")) {
467 /*
468 * If it did not parse as 'all' try to treat it as an integer
469 * when we have memory tracking enabled.
470 */
471 u32 age_sec;
472
473 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) &&
474 !kstrtouint(buf, 0, &age_sec))
475 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(),
476 age_sec);
477 else
478 return -EINVAL;
479 }
480
481 guard(rwsem_read)(&zram->dev_lock);
482 if (!init_done(zram))
483 return -EINVAL;
484
485 /*
486 * A cutoff of 0 marks everything as idle, this is the
487 * "all" behavior.
488 */
489 mark_idle(zram, cutoff);
490 return len;
491 }
492
493 #ifdef CONFIG_ZRAM_WRITEBACK
494 #define INVALID_BDEV_BLOCK (~0UL)
495
496 static int read_from_zspool_raw(struct zram *zram, struct page *page,
497 u32 index);
498 static int read_from_zspool(struct zram *zram, struct page *page, u32 index);
499
500 struct zram_wb_ctl {
501 /* idle list is accessed only by the writeback task, no concurency */
502 struct list_head idle_reqs;
503 /* done list is accessed concurrently, protect by done_lock */
504 struct list_head done_reqs;
505 wait_queue_head_t done_wait;
506 spinlock_t done_lock;
507 atomic_t num_inflight;
508 struct rcu_head rcu;
509 };
510
511 struct zram_wb_req {
512 unsigned long blk_idx;
513 struct page *page;
514 struct zram_pp_slot *pps;
515 struct bio_vec bio_vec;
516 struct bio bio;
517
518 struct list_head entry;
519 };
520
521 struct zram_rb_req {
522 struct work_struct work;
523 struct zram *zram;
524 struct page *page;
525 /* The read bio for backing device */
526 struct bio *bio;
527 unsigned long blk_idx;
528 union {
529 /* The original bio to complete (async read) */
530 struct bio *parent;
531 /* error status (sync read) */
532 int error;
533 };
534 u32 index;
535 };
536
537 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)538 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr,
539 char *buf)
540 {
541 struct zram *zram = dev_to_zram(dev);
542 ssize_t ret;
543
544 guard(rwsem_read)(&zram->dev_lock);
545 ret = sysfs_emit(buf,
546 "%8llu %8llu %8llu\n",
547 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
548 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
549 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
550
551 return ret;
552 }
553
compressed_writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)554 static ssize_t compressed_writeback_store(struct device *dev,
555 struct device_attribute *attr,
556 const char *buf, size_t len)
557 {
558 struct zram *zram = dev_to_zram(dev);
559 bool val;
560
561 if (kstrtobool(buf, &val))
562 return -EINVAL;
563
564 guard(rwsem_write)(&zram->dev_lock);
565 if (init_done(zram)) {
566 return -EBUSY;
567 }
568
569 zram->compressed_wb = val;
570
571 return len;
572 }
573
compressed_writeback_show(struct device * dev,struct device_attribute * attr,char * buf)574 static ssize_t compressed_writeback_show(struct device *dev,
575 struct device_attribute *attr,
576 char *buf)
577 {
578 bool val;
579 struct zram *zram = dev_to_zram(dev);
580
581 guard(rwsem_read)(&zram->dev_lock);
582 val = zram->compressed_wb;
583
584 return sysfs_emit(buf, "%d\n", val);
585 }
586
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)587 static ssize_t writeback_limit_enable_store(struct device *dev,
588 struct device_attribute *attr,
589 const char *buf, size_t len)
590 {
591 struct zram *zram = dev_to_zram(dev);
592 u64 val;
593
594 if (kstrtoull(buf, 10, &val))
595 return -EINVAL;
596
597 guard(rwsem_write)(&zram->dev_lock);
598 zram->wb_limit_enable = val;
599
600 return len;
601 }
602
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)603 static ssize_t writeback_limit_enable_show(struct device *dev,
604 struct device_attribute *attr,
605 char *buf)
606 {
607 bool val;
608 struct zram *zram = dev_to_zram(dev);
609
610 guard(rwsem_read)(&zram->dev_lock);
611 val = zram->wb_limit_enable;
612
613 return sysfs_emit(buf, "%d\n", val);
614 }
615
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)616 static ssize_t writeback_limit_store(struct device *dev,
617 struct device_attribute *attr,
618 const char *buf, size_t len)
619 {
620 struct zram *zram = dev_to_zram(dev);
621 u64 val;
622
623 if (kstrtoull(buf, 10, &val))
624 return -EINVAL;
625
626 /*
627 * When the page size is greater than 4KB, if bd_wb_limit is set to
628 * a value that is not page - size aligned, it will cause value
629 * wrapping. For example, when the page size is set to 16KB and
630 * bd_wb_limit is set to 3, a single write - back operation will
631 * cause bd_wb_limit to become -1. Even more terrifying is that
632 * bd_wb_limit is an unsigned number.
633 */
634 val = rounddown(val, PAGE_SIZE / 4096);
635
636 guard(rwsem_write)(&zram->dev_lock);
637 zram->bd_wb_limit = val;
638
639 return len;
640 }
641
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)642 static ssize_t writeback_limit_show(struct device *dev,
643 struct device_attribute *attr, char *buf)
644 {
645 u64 val;
646 struct zram *zram = dev_to_zram(dev);
647
648 guard(rwsem_read)(&zram->dev_lock);
649 val = zram->bd_wb_limit;
650
651 return sysfs_emit(buf, "%llu\n", val);
652 }
653
writeback_batch_size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)654 static ssize_t writeback_batch_size_store(struct device *dev,
655 struct device_attribute *attr,
656 const char *buf, size_t len)
657 {
658 struct zram *zram = dev_to_zram(dev);
659 u32 val;
660
661 if (kstrtouint(buf, 10, &val))
662 return -EINVAL;
663
664 if (!val)
665 return -EINVAL;
666
667 guard(rwsem_write)(&zram->dev_lock);
668 zram->wb_batch_size = val;
669
670 return len;
671 }
672
writeback_batch_size_show(struct device * dev,struct device_attribute * attr,char * buf)673 static ssize_t writeback_batch_size_show(struct device *dev,
674 struct device_attribute *attr,
675 char *buf)
676 {
677 u32 val;
678 struct zram *zram = dev_to_zram(dev);
679
680 guard(rwsem_read)(&zram->dev_lock);
681 val = zram->wb_batch_size;
682
683 return sysfs_emit(buf, "%u\n", val);
684 }
685
reset_bdev(struct zram * zram)686 static void reset_bdev(struct zram *zram)
687 {
688 if (!zram->backing_dev)
689 return;
690
691 /* hope filp_close flush all of IO */
692 filp_close(zram->backing_dev, NULL);
693 zram->backing_dev = NULL;
694 zram->bdev = NULL;
695 zram->disk->fops = &zram_devops;
696 kvfree(zram->bitmap);
697 zram->bitmap = NULL;
698 }
699
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)700 static ssize_t backing_dev_show(struct device *dev,
701 struct device_attribute *attr, char *buf)
702 {
703 struct file *file;
704 struct zram *zram = dev_to_zram(dev);
705 char *p;
706 ssize_t ret;
707
708 guard(rwsem_read)(&zram->dev_lock);
709 file = zram->backing_dev;
710 if (!file) {
711 memcpy(buf, "none\n", 5);
712 return 5;
713 }
714
715 p = file_path(file, buf, PAGE_SIZE - 1);
716 if (IS_ERR(p))
717 return PTR_ERR(p);
718
719 ret = strlen(p);
720 memmove(buf, p, ret);
721 buf[ret++] = '\n';
722 return ret;
723 }
724
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)725 static ssize_t backing_dev_store(struct device *dev,
726 struct device_attribute *attr, const char *buf,
727 size_t len)
728 {
729 char *file_name;
730 size_t sz;
731 struct file *backing_dev = NULL;
732 struct inode *inode;
733 unsigned int bitmap_sz;
734 unsigned long nr_pages, *bitmap = NULL;
735 int err;
736 struct zram *zram = dev_to_zram(dev);
737
738 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
739 if (!file_name)
740 return -ENOMEM;
741
742 guard(rwsem_write)(&zram->dev_lock);
743 if (init_done(zram)) {
744 pr_info("Can't setup backing device for initialized device\n");
745 err = -EBUSY;
746 goto out;
747 }
748
749 strscpy(file_name, buf, PATH_MAX);
750 /* ignore trailing newline */
751 sz = strlen(file_name);
752 if (sz > 0 && file_name[sz - 1] == '\n')
753 file_name[sz - 1] = 0x00;
754
755 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
756 if (IS_ERR(backing_dev)) {
757 err = PTR_ERR(backing_dev);
758 backing_dev = NULL;
759 goto out;
760 }
761
762 inode = backing_dev->f_mapping->host;
763
764 /* Support only block device in this moment */
765 if (!S_ISBLK(inode->i_mode)) {
766 err = -ENOTBLK;
767 goto out;
768 }
769
770 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
771 /* Refuse to use zero sized device (also prevents self reference) */
772 if (!nr_pages) {
773 err = -EINVAL;
774 goto out;
775 }
776
777 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
778 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
779 if (!bitmap) {
780 err = -ENOMEM;
781 goto out;
782 }
783
784 reset_bdev(zram);
785
786 zram->bdev = I_BDEV(inode);
787 zram->backing_dev = backing_dev;
788 zram->bitmap = bitmap;
789 zram->nr_pages = nr_pages;
790
791 pr_info("setup backing device %s\n", file_name);
792 kfree(file_name);
793
794 return len;
795 out:
796 kvfree(bitmap);
797
798 if (backing_dev)
799 filp_close(backing_dev, NULL);
800
801 kfree(file_name);
802
803 return err;
804 }
805
zram_reserve_bdev_block(struct zram * zram)806 static unsigned long zram_reserve_bdev_block(struct zram *zram)
807 {
808 unsigned long blk_idx;
809
810 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0);
811 if (blk_idx == zram->nr_pages)
812 return INVALID_BDEV_BLOCK;
813
814 set_bit(blk_idx, zram->bitmap);
815 atomic64_inc(&zram->stats.bd_count);
816 return blk_idx;
817 }
818
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)819 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
820 {
821 int was_set;
822
823 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
824 WARN_ON_ONCE(!was_set);
825 atomic64_dec(&zram->stats.bd_count);
826 }
827
release_wb_req(struct zram_wb_req * req)828 static void release_wb_req(struct zram_wb_req *req)
829 {
830 __free_page(req->page);
831 kfree(req);
832 }
833
release_wb_ctl(struct zram_wb_ctl * wb_ctl)834 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
835 {
836 if (!wb_ctl)
837 return;
838
839 /* We should never have inflight requests at this point */
840 WARN_ON(atomic_read(&wb_ctl->num_inflight));
841 WARN_ON(!list_empty(&wb_ctl->done_reqs));
842
843 while (!list_empty(&wb_ctl->idle_reqs)) {
844 struct zram_wb_req *req;
845
846 req = list_first_entry(&wb_ctl->idle_reqs,
847 struct zram_wb_req, entry);
848 list_del(&req->entry);
849 release_wb_req(req);
850 }
851
852 kfree_rcu(wb_ctl, rcu);
853 }
854
init_wb_ctl(struct zram * zram)855 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
856 {
857 struct zram_wb_ctl *wb_ctl;
858 int i;
859
860 wb_ctl = kmalloc_obj(*wb_ctl);
861 if (!wb_ctl)
862 return NULL;
863
864 INIT_LIST_HEAD(&wb_ctl->idle_reqs);
865 INIT_LIST_HEAD(&wb_ctl->done_reqs);
866 atomic_set(&wb_ctl->num_inflight, 0);
867 init_waitqueue_head(&wb_ctl->done_wait);
868 spin_lock_init(&wb_ctl->done_lock);
869
870 for (i = 0; i < zram->wb_batch_size; i++) {
871 struct zram_wb_req *req;
872
873 /*
874 * This is fatal condition only if we couldn't allocate
875 * any requests at all. Otherwise we just work with the
876 * requests that we have successfully allocated, so that
877 * writeback can still proceed, even if there is only one
878 * request on the idle list.
879 */
880 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN);
881 if (!req)
882 break;
883
884 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN);
885 if (!req->page) {
886 kfree(req);
887 break;
888 }
889
890 list_add(&req->entry, &wb_ctl->idle_reqs);
891 }
892
893 /* We couldn't allocate any requests, so writeabck is not possible */
894 if (list_empty(&wb_ctl->idle_reqs))
895 goto release_wb_ctl;
896
897 return wb_ctl;
898
899 release_wb_ctl:
900 release_wb_ctl(wb_ctl);
901 return NULL;
902 }
903
zram_account_writeback_rollback(struct zram * zram)904 static void zram_account_writeback_rollback(struct zram *zram)
905 {
906 lockdep_assert_held_write(&zram->dev_lock);
907
908 if (zram->wb_limit_enable)
909 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
910 }
911
zram_account_writeback_submit(struct zram * zram)912 static void zram_account_writeback_submit(struct zram *zram)
913 {
914 lockdep_assert_held_write(&zram->dev_lock);
915
916 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
917 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
918 }
919
zram_writeback_complete(struct zram * zram,struct zram_wb_req * req)920 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
921 {
922 u32 index = req->pps->index;
923 int err;
924
925 err = blk_status_to_errno(req->bio.bi_status);
926 if (err) {
927 /*
928 * Failed wb requests should not be accounted in wb_limit
929 * (if enabled).
930 */
931 zram_account_writeback_rollback(zram);
932 zram_release_bdev_block(zram, req->blk_idx);
933 return err;
934 }
935
936 atomic64_inc(&zram->stats.bd_writes);
937 slot_lock(zram, index);
938 /*
939 * We release slot lock during writeback so slot can change under us:
940 * slot_free() or slot_free() and zram_write_page(). In both cases
941 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can
942 * set ZRAM_PP_SLOT on such slots until current post-processing
943 * finishes.
944 */
945 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) {
946 zram_release_bdev_block(zram, req->blk_idx);
947 goto out;
948 }
949
950 clear_slot_flag(zram, index, ZRAM_IDLE);
951 if (test_slot_flag(zram, index, ZRAM_HUGE))
952 atomic64_dec(&zram->stats.huge_pages);
953 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size);
954 zs_free(zram->mem_pool, get_slot_handle(zram, index));
955 set_slot_handle(zram, index, req->blk_idx);
956 set_slot_flag(zram, index, ZRAM_WB);
957
958 out:
959 slot_unlock(zram, index);
960 return 0;
961 }
962
zram_writeback_endio(struct bio * bio)963 static void zram_writeback_endio(struct bio *bio)
964 {
965 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio);
966 struct zram_wb_ctl *wb_ctl = bio->bi_private;
967 unsigned long flags;
968
969 rcu_read_lock();
970 spin_lock_irqsave(&wb_ctl->done_lock, flags);
971 list_add(&req->entry, &wb_ctl->done_reqs);
972 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
973
974 wake_up(&wb_ctl->done_wait);
975 rcu_read_unlock();
976 }
977
zram_submit_wb_request(struct zram * zram,struct zram_wb_ctl * wb_ctl,struct zram_wb_req * req)978 static void zram_submit_wb_request(struct zram *zram,
979 struct zram_wb_ctl *wb_ctl,
980 struct zram_wb_req *req)
981 {
982 /*
983 * wb_limit (if enabled) should be adjusted before submission,
984 * so that we don't over-submit.
985 */
986 zram_account_writeback_submit(zram);
987 atomic_inc(&wb_ctl->num_inflight);
988 req->bio.bi_private = wb_ctl;
989 submit_bio(&req->bio);
990 }
991
zram_complete_done_reqs(struct zram * zram,struct zram_wb_ctl * wb_ctl)992 static int zram_complete_done_reqs(struct zram *zram,
993 struct zram_wb_ctl *wb_ctl)
994 {
995 struct zram_wb_req *req;
996 unsigned long flags;
997 int ret = 0, err;
998
999 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1000 spin_lock_irqsave(&wb_ctl->done_lock, flags);
1001 req = list_first_entry_or_null(&wb_ctl->done_reqs,
1002 struct zram_wb_req, entry);
1003 if (req)
1004 list_del(&req->entry);
1005 spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
1006
1007 /* ->num_inflight > 0 doesn't mean we have done requests */
1008 if (!req)
1009 break;
1010
1011 err = zram_writeback_complete(zram, req);
1012 if (err)
1013 ret = err;
1014
1015 atomic_dec(&wb_ctl->num_inflight);
1016 release_pp_slot(zram, req->pps);
1017 req->pps = NULL;
1018
1019 list_add(&req->entry, &wb_ctl->idle_reqs);
1020 }
1021
1022 return ret;
1023 }
1024
zram_select_idle_req(struct zram_wb_ctl * wb_ctl)1025 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl)
1026 {
1027 struct zram_wb_req *req;
1028
1029 req = list_first_entry_or_null(&wb_ctl->idle_reqs,
1030 struct zram_wb_req, entry);
1031 if (req)
1032 list_del(&req->entry);
1033 return req;
1034 }
1035
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl,struct zram_wb_ctl * wb_ctl)1036 static int zram_writeback_slots(struct zram *zram,
1037 struct zram_pp_ctl *ctl,
1038 struct zram_wb_ctl *wb_ctl)
1039 {
1040 unsigned long blk_idx = INVALID_BDEV_BLOCK;
1041 struct zram_wb_req *req = NULL;
1042 struct zram_pp_slot *pps;
1043 int ret = 0, err = 0;
1044 u32 index = 0;
1045
1046 while ((pps = select_pp_slot(ctl))) {
1047 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
1048 ret = -EIO;
1049 break;
1050 }
1051
1052 while (!req) {
1053 req = zram_select_idle_req(wb_ctl);
1054 if (req)
1055 break;
1056
1057 wait_event(wb_ctl->done_wait,
1058 !list_empty(&wb_ctl->done_reqs));
1059
1060 err = zram_complete_done_reqs(zram, wb_ctl);
1061 /*
1062 * BIO errors are not fatal, we continue and simply
1063 * attempt to writeback the remaining objects (pages).
1064 * At the same time we need to signal user-space that
1065 * some writes (at least one, but also could be all of
1066 * them) were not successful and we do so by returning
1067 * the most recent BIO error.
1068 */
1069 if (err)
1070 ret = err;
1071 }
1072
1073 if (blk_idx == INVALID_BDEV_BLOCK) {
1074 blk_idx = zram_reserve_bdev_block(zram);
1075 if (blk_idx == INVALID_BDEV_BLOCK) {
1076 ret = -ENOSPC;
1077 break;
1078 }
1079 }
1080
1081 index = pps->index;
1082 slot_lock(zram, index);
1083 /*
1084 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
1085 * slots can change in the meantime. If slots are accessed or
1086 * freed they lose ZRAM_PP_SLOT flag and hence we don't
1087 * post-process them.
1088 */
1089 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT))
1090 goto next;
1091 if (zram->compressed_wb)
1092 err = read_from_zspool_raw(zram, req->page, index);
1093 else
1094 err = read_from_zspool(zram, req->page, index);
1095 if (err)
1096 goto next;
1097 slot_unlock(zram, index);
1098
1099 /*
1100 * From now on pp-slot is owned by the req, remove it from
1101 * its pp bucket.
1102 */
1103 list_del_init(&pps->entry);
1104
1105 req->blk_idx = blk_idx;
1106 req->pps = pps;
1107 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE);
1108 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1109 req->bio.bi_end_io = zram_writeback_endio;
1110 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0);
1111
1112 zram_submit_wb_request(zram, wb_ctl, req);
1113 blk_idx = INVALID_BDEV_BLOCK;
1114 req = NULL;
1115 cond_resched();
1116 continue;
1117
1118 next:
1119 slot_unlock(zram, index);
1120 release_pp_slot(zram, pps);
1121 }
1122
1123 /*
1124 * Selected idle req, but never submitted it due to some error or
1125 * wb limit.
1126 */
1127 if (req)
1128 release_wb_req(req);
1129
1130 while (atomic_read(&wb_ctl->num_inflight) > 0) {
1131 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs));
1132 err = zram_complete_done_reqs(zram, wb_ctl);
1133 if (err)
1134 ret = err;
1135 }
1136
1137 return ret;
1138 }
1139
1140 #define PAGE_WRITEBACK 0
1141 #define HUGE_WRITEBACK (1 << 0)
1142 #define IDLE_WRITEBACK (1 << 1)
1143 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
1144
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1145 static int parse_page_index(char *val, unsigned long nr_pages,
1146 unsigned long *lo, unsigned long *hi)
1147 {
1148 int ret;
1149
1150 ret = kstrtoul(val, 10, lo);
1151 if (ret)
1152 return ret;
1153 if (*lo >= nr_pages)
1154 return -ERANGE;
1155 *hi = *lo + 1;
1156 return 0;
1157 }
1158
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)1159 static int parse_page_indexes(char *val, unsigned long nr_pages,
1160 unsigned long *lo, unsigned long *hi)
1161 {
1162 char *delim;
1163 int ret;
1164
1165 delim = strchr(val, '-');
1166 if (!delim)
1167 return -EINVAL;
1168
1169 *delim = 0x00;
1170 ret = kstrtoul(val, 10, lo);
1171 if (ret)
1172 return ret;
1173 if (*lo >= nr_pages)
1174 return -ERANGE;
1175
1176 ret = kstrtoul(delim + 1, 10, hi);
1177 if (ret)
1178 return ret;
1179 if (*hi >= nr_pages || *lo > *hi)
1180 return -ERANGE;
1181 *hi += 1;
1182 return 0;
1183 }
1184
parse_mode(char * val,u32 * mode)1185 static int parse_mode(char *val, u32 *mode)
1186 {
1187 *mode = 0;
1188
1189 if (!strcmp(val, "idle"))
1190 *mode = IDLE_WRITEBACK;
1191 if (!strcmp(val, "huge"))
1192 *mode = HUGE_WRITEBACK;
1193 if (!strcmp(val, "huge_idle"))
1194 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
1195 if (!strcmp(val, "incompressible"))
1196 *mode = INCOMPRESSIBLE_WRITEBACK;
1197
1198 if (*mode == 0)
1199 return -EINVAL;
1200 return 0;
1201 }
1202
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)1203 static void scan_slots_for_writeback(struct zram *zram, u32 mode,
1204 unsigned long lo, unsigned long hi,
1205 struct zram_pp_ctl *ctl)
1206 {
1207 u32 index = lo;
1208
1209 while (index < hi) {
1210 bool ok = true;
1211
1212 slot_lock(zram, index);
1213 if (!slot_allocated(zram, index))
1214 goto next;
1215
1216 if (test_slot_flag(zram, index, ZRAM_WB) ||
1217 test_slot_flag(zram, index, ZRAM_SAME))
1218 goto next;
1219
1220 if (mode & IDLE_WRITEBACK &&
1221 !test_slot_flag(zram, index, ZRAM_IDLE))
1222 goto next;
1223 if (mode & HUGE_WRITEBACK &&
1224 !test_slot_flag(zram, index, ZRAM_HUGE))
1225 goto next;
1226 if (mode & INCOMPRESSIBLE_WRITEBACK &&
1227 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1228 goto next;
1229
1230 ok = place_pp_slot(zram, ctl, index);
1231 next:
1232 slot_unlock(zram, index);
1233 if (!ok)
1234 break;
1235 index++;
1236 }
1237 }
1238
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1239 static ssize_t writeback_store(struct device *dev,
1240 struct device_attribute *attr,
1241 const char *buf, size_t len)
1242 {
1243 struct zram *zram = dev_to_zram(dev);
1244 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
1245 unsigned long lo = 0, hi = nr_pages;
1246 struct zram_pp_ctl *pp_ctl = NULL;
1247 struct zram_wb_ctl *wb_ctl = NULL;
1248 char *args, *param, *val;
1249 ssize_t ret = len;
1250 int err, mode = 0;
1251
1252 guard(rwsem_write)(&zram->dev_lock);
1253 if (!init_done(zram))
1254 return -EINVAL;
1255
1256 if (!zram->backing_dev)
1257 return -ENODEV;
1258
1259 pp_ctl = init_pp_ctl();
1260 if (!pp_ctl)
1261 return -ENOMEM;
1262
1263 wb_ctl = init_wb_ctl(zram);
1264 if (!wb_ctl) {
1265 ret = -ENOMEM;
1266 goto out;
1267 }
1268
1269 args = skip_spaces(buf);
1270 while (*args) {
1271 args = next_arg(args, ¶m, &val);
1272
1273 /*
1274 * Workaround to support the old writeback interface.
1275 *
1276 * The old writeback interface has a minor inconsistency and
1277 * requires key=value only for page_index parameter, while the
1278 * writeback mode is a valueless parameter.
1279 *
1280 * This is not the case anymore and now all parameters are
1281 * required to have values, however, we need to support the
1282 * legacy writeback interface format so we check if we can
1283 * recognize a valueless parameter as the (legacy) writeback
1284 * mode.
1285 */
1286 if (!val || !*val) {
1287 err = parse_mode(param, &mode);
1288 if (err) {
1289 ret = err;
1290 goto out;
1291 }
1292
1293 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1294 break;
1295 }
1296
1297 if (!strcmp(param, "type")) {
1298 err = parse_mode(val, &mode);
1299 if (err) {
1300 ret = err;
1301 goto out;
1302 }
1303
1304 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1305 break;
1306 }
1307
1308 if (!strcmp(param, "page_index")) {
1309 err = parse_page_index(val, nr_pages, &lo, &hi);
1310 if (err) {
1311 ret = err;
1312 goto out;
1313 }
1314
1315 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1316 continue;
1317 }
1318
1319 if (!strcmp(param, "page_indexes")) {
1320 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1321 if (err) {
1322 ret = err;
1323 goto out;
1324 }
1325
1326 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1327 continue;
1328 }
1329 }
1330
1331 err = zram_writeback_slots(zram, pp_ctl, wb_ctl);
1332 if (err)
1333 ret = err;
1334
1335 out:
1336 release_pp_ctl(zram, pp_ctl);
1337 release_wb_ctl(wb_ctl);
1338
1339 return ret;
1340 }
1341
decompress_bdev_page(struct zram * zram,struct page * page,u32 index)1342 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index)
1343 {
1344 struct zcomp_strm *zstrm;
1345 unsigned int size;
1346 int ret, prio;
1347 void *src;
1348
1349 slot_lock(zram, index);
1350 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */
1351 if (!test_slot_flag(zram, index, ZRAM_WB)) {
1352 slot_unlock(zram, index);
1353 /* We read some stale data, zero it out */
1354 memset_page(page, 0, 0, PAGE_SIZE);
1355 return -EIO;
1356 }
1357
1358 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
1359 slot_unlock(zram, index);
1360 return 0;
1361 }
1362
1363 size = get_slot_size(zram, index);
1364 prio = get_slot_comp_priority(zram, index);
1365
1366 zstrm = zcomp_stream_get(zram->comps[prio]);
1367 src = kmap_local_page(page);
1368 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size,
1369 zstrm->local_copy);
1370 if (!ret)
1371 copy_page(src, zstrm->local_copy);
1372 kunmap_local(src);
1373 zcomp_stream_put(zstrm);
1374 slot_unlock(zram, index);
1375
1376 return ret;
1377 }
1378
zram_deferred_decompress(struct work_struct * w)1379 static void zram_deferred_decompress(struct work_struct *w)
1380 {
1381 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1382 struct page *page = bio_first_page_all(req->bio);
1383 struct zram *zram = req->zram;
1384 u32 index = req->index;
1385 int ret;
1386
1387 ret = decompress_bdev_page(zram, page, index);
1388 if (ret)
1389 req->parent->bi_status = BLK_STS_IOERR;
1390
1391 /* Decrement parent's ->remaining */
1392 bio_endio(req->parent);
1393 bio_put(req->bio);
1394 kfree(req);
1395 }
1396
zram_async_read_endio(struct bio * bio)1397 static void zram_async_read_endio(struct bio *bio)
1398 {
1399 struct zram_rb_req *req = bio->bi_private;
1400 struct zram *zram = req->zram;
1401
1402 if (bio->bi_status) {
1403 req->parent->bi_status = bio->bi_status;
1404 bio_endio(req->parent);
1405 bio_put(bio);
1406 kfree(req);
1407 return;
1408 }
1409
1410 /*
1411 * NOTE: zram_async_read_endio() is not exactly right place for this.
1412 * Ideally, we need to do it after ZRAM_WB check, but this requires
1413 * us to use wq path even on systems that don't enable compressed
1414 * writeback, because we cannot take slot-lock in the current context.
1415 *
1416 * Keep the existing behavior for now.
1417 */
1418 if (zram->compressed_wb == false) {
1419 /* No decompression needed, complete the parent IO */
1420 bio_endio(req->parent);
1421 bio_put(bio);
1422 kfree(req);
1423 return;
1424 }
1425
1426 /*
1427 * zram decompression is sleepable, so we need to deffer it to
1428 * a preemptible context.
1429 */
1430 INIT_WORK(&req->work, zram_deferred_decompress);
1431 queue_work(system_highpri_wq, &req->work);
1432 }
1433
read_from_bdev_async(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1434 static int read_from_bdev_async(struct zram *zram, struct page *page,
1435 u32 index, unsigned long blk_idx,
1436 struct bio *parent)
1437 {
1438 struct zram_rb_req *req;
1439 struct bio *bio;
1440
1441 req = kmalloc_obj(*req, GFP_NOIO);
1442 if (!req)
1443 return -ENOMEM;
1444
1445 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
1446 if (!bio) {
1447 kfree(req);
1448 return -ENOMEM;
1449 }
1450
1451 req->zram = zram;
1452 req->index = index;
1453 req->blk_idx = blk_idx;
1454 req->bio = bio;
1455 req->parent = parent;
1456
1457 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
1458 bio->bi_private = req;
1459 bio->bi_end_io = zram_async_read_endio;
1460
1461 __bio_add_page(bio, page, PAGE_SIZE, 0);
1462 bio_inc_remaining(parent);
1463 submit_bio(bio);
1464
1465 return 0;
1466 }
1467
zram_sync_read(struct work_struct * w)1468 static void zram_sync_read(struct work_struct *w)
1469 {
1470 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work);
1471 struct bio_vec bv;
1472 struct bio bio;
1473
1474 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ);
1475 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1476 __bio_add_page(&bio, req->page, PAGE_SIZE, 0);
1477 req->error = submit_bio_wait(&bio);
1478 }
1479
1480 /*
1481 * Block layer want one ->submit_bio to be active at a time, so if we use
1482 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1483 * use a worker thread context.
1484 */
read_from_bdev_sync(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx)1485 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index,
1486 unsigned long blk_idx)
1487 {
1488 struct zram_rb_req req;
1489
1490 req.page = page;
1491 req.zram = zram;
1492 req.blk_idx = blk_idx;
1493
1494 INIT_WORK_ONSTACK(&req.work, zram_sync_read);
1495 queue_work(system_dfl_wq, &req.work);
1496 flush_work(&req.work);
1497 destroy_work_on_stack(&req.work);
1498
1499 if (req.error || zram->compressed_wb == false)
1500 return req.error;
1501
1502 return decompress_bdev_page(zram, page, index);
1503 }
1504
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1505 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1506 unsigned long blk_idx, struct bio *parent)
1507 {
1508 atomic64_inc(&zram->stats.bd_reads);
1509 if (!parent) {
1510 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1511 return -EIO;
1512 return read_from_bdev_sync(zram, page, index, blk_idx);
1513 }
1514 return read_from_bdev_async(zram, page, index, blk_idx, parent);
1515 }
1516 #else
reset_bdev(struct zram * zram)1517 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,u32 index,unsigned long blk_idx,struct bio * parent)1518 static int read_from_bdev(struct zram *zram, struct page *page, u32 index,
1519 unsigned long blk_idx, struct bio *parent)
1520 {
1521 return -EIO;
1522 }
1523
zram_release_bdev_block(struct zram * zram,unsigned long blk_idx)1524 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
1525 {
1526 }
1527 #endif
1528
1529 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1530
1531 static struct dentry *zram_debugfs_root;
1532
zram_debugfs_create(void)1533 static void zram_debugfs_create(void)
1534 {
1535 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1536 }
1537
zram_debugfs_destroy(void)1538 static void zram_debugfs_destroy(void)
1539 {
1540 debugfs_remove_recursive(zram_debugfs_root);
1541 }
1542
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1543 static ssize_t read_block_state(struct file *file, char __user *buf,
1544 size_t count, loff_t *ppos)
1545 {
1546 char *kbuf;
1547 ssize_t index, written = 0;
1548 struct zram *zram = file->private_data;
1549 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1550
1551 kbuf = kvmalloc(count, GFP_KERNEL);
1552 if (!kbuf)
1553 return -ENOMEM;
1554
1555 guard(rwsem_read)(&zram->dev_lock);
1556 if (!init_done(zram)) {
1557 kvfree(kbuf);
1558 return -EINVAL;
1559 }
1560
1561 for (index = *ppos; index < nr_pages; index++) {
1562 int copied;
1563
1564 slot_lock(zram, index);
1565 if (!slot_allocated(zram, index))
1566 goto next;
1567
1568 copied = snprintf(kbuf + written, count,
1569 "%12zd %12u.%06d %c%c%c%c%c%c\n",
1570 index, zram->table[index].attr.ac_time, 0,
1571 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1572 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1573 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1574 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1575 get_slot_comp_priority(zram, index) ? 'r' : '.',
1576 test_slot_flag(zram, index,
1577 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1578
1579 if (count <= copied) {
1580 slot_unlock(zram, index);
1581 break;
1582 }
1583 written += copied;
1584 count -= copied;
1585 next:
1586 slot_unlock(zram, index);
1587 *ppos += 1;
1588 }
1589
1590 if (copy_to_user(buf, kbuf, written))
1591 written = -EFAULT;
1592 kvfree(kbuf);
1593
1594 return written;
1595 }
1596
1597 static const struct file_operations proc_zram_block_state_op = {
1598 .open = simple_open,
1599 .read = read_block_state,
1600 .llseek = default_llseek,
1601 };
1602
zram_debugfs_register(struct zram * zram)1603 static void zram_debugfs_register(struct zram *zram)
1604 {
1605 if (!zram_debugfs_root)
1606 return;
1607
1608 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1609 zram_debugfs_root);
1610 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1611 zram, &proc_zram_block_state_op);
1612 }
1613
zram_debugfs_unregister(struct zram * zram)1614 static void zram_debugfs_unregister(struct zram *zram)
1615 {
1616 debugfs_remove_recursive(zram->debugfs_dir);
1617 }
1618 #else
zram_debugfs_create(void)1619 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1620 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1621 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1622 static void zram_debugfs_unregister(struct zram *zram) {};
1623 #endif
1624
1625 /* Only algo parameter given, lookup by algo name */
lookup_algo_priority(struct zram * zram,const char * algo,u32 min_prio)1626 static int lookup_algo_priority(struct zram *zram, const char *algo,
1627 u32 min_prio)
1628 {
1629 s32 prio;
1630
1631 for (prio = min_prio; prio < ZRAM_MAX_COMPS; prio++) {
1632 if (!zram->comp_algs[prio])
1633 continue;
1634
1635 if (!strcmp(zram->comp_algs[prio], algo))
1636 return prio;
1637 }
1638
1639 return -EINVAL;
1640 }
1641
1642 /* Both algo and priority parameters given, validate them */
validate_algo_priority(struct zram * zram,const char * algo,u32 prio)1643 static int validate_algo_priority(struct zram *zram, const char *algo, u32 prio)
1644 {
1645 if (prio >= ZRAM_MAX_COMPS)
1646 return -EINVAL;
1647 /* No algo at given priority */
1648 if (!zram->comp_algs[prio])
1649 return -EINVAL;
1650 /* A different algo at given priority */
1651 if (strcmp(zram->comp_algs[prio], algo))
1652 return -EINVAL;
1653 return 0;
1654 }
1655
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1656 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1657 {
1658 zram->comp_algs[prio] = alg;
1659 }
1660
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1661 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1662 {
1663 const char *alg;
1664 size_t sz;
1665
1666 sz = strlen(buf);
1667 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1668 return -E2BIG;
1669
1670 alg = zcomp_lookup_backend_name(buf);
1671 if (!alg)
1672 return -EINVAL;
1673
1674 guard(rwsem_write)(&zram->dev_lock);
1675 if (init_done(zram)) {
1676 pr_info("Can't change algorithm for initialized device\n");
1677 return -EBUSY;
1678 }
1679
1680 comp_algorithm_set(zram, prio, alg);
1681 return 0;
1682 }
1683
comp_params_reset(struct zram * zram,u32 prio)1684 static void comp_params_reset(struct zram *zram, u32 prio)
1685 {
1686 struct zcomp_params *params = &zram->params[prio];
1687
1688 vfree(params->dict);
1689 params->level = ZCOMP_PARAM_NOT_SET;
1690 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1691 params->dict_sz = 0;
1692 params->dict = NULL;
1693 }
1694
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1695 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1696 const char *dict_path,
1697 struct deflate_params *deflate_params)
1698 {
1699 ssize_t sz = 0;
1700
1701 comp_params_reset(zram, prio);
1702
1703 if (dict_path) {
1704 sz = kernel_read_file_from_path(dict_path, 0,
1705 &zram->params[prio].dict,
1706 INT_MAX,
1707 NULL,
1708 READING_POLICY);
1709 if (sz < 0)
1710 return -EINVAL;
1711 }
1712
1713 zram->params[prio].dict_sz = sz;
1714 zram->params[prio].level = level;
1715 zram->params[prio].deflate.winbits = deflate_params->winbits;
1716 return 0;
1717 }
1718
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1719 static ssize_t algorithm_params_store(struct device *dev,
1720 struct device_attribute *attr,
1721 const char *buf,
1722 size_t len)
1723 {
1724 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1725 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1726 struct deflate_params deflate_params;
1727 struct zram *zram = dev_to_zram(dev);
1728 bool prio_param = false;
1729 int ret;
1730
1731 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1732
1733 args = skip_spaces(buf);
1734 while (*args) {
1735 args = next_arg(args, ¶m, &val);
1736
1737 if (!val || !*val)
1738 return -EINVAL;
1739
1740 if (!strcmp(param, "priority")) {
1741 prio_param = true;
1742 ret = kstrtoint(val, 10, &prio);
1743 if (ret)
1744 return ret;
1745 continue;
1746 }
1747
1748 if (!strcmp(param, "level")) {
1749 ret = kstrtoint(val, 10, &level);
1750 if (ret)
1751 return ret;
1752 continue;
1753 }
1754
1755 if (!strcmp(param, "algo")) {
1756 algo = val;
1757 continue;
1758 }
1759
1760 if (!strcmp(param, "dict")) {
1761 dict_path = val;
1762 continue;
1763 }
1764
1765 if (!strcmp(param, "deflate.winbits")) {
1766 ret = kstrtoint(val, 10, &deflate_params.winbits);
1767 if (ret)
1768 return ret;
1769 continue;
1770 }
1771 }
1772
1773 guard(rwsem_write)(&zram->dev_lock);
1774 if (init_done(zram))
1775 return -EBUSY;
1776
1777 if (prio_param) {
1778 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1779 return -EINVAL;
1780 }
1781
1782 if (algo && prio_param) {
1783 ret = validate_algo_priority(zram, algo, prio);
1784 if (ret)
1785 return ret;
1786 }
1787
1788 if (algo && !prio_param) {
1789 prio = lookup_algo_priority(zram, algo, ZRAM_PRIMARY_COMP);
1790 if (prio < 0)
1791 return -EINVAL;
1792 }
1793
1794 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1795 return ret ? ret : len;
1796 }
1797
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1798 static ssize_t comp_algorithm_show(struct device *dev,
1799 struct device_attribute *attr,
1800 char *buf)
1801 {
1802 struct zram *zram = dev_to_zram(dev);
1803 ssize_t sz;
1804
1805 guard(rwsem_read)(&zram->dev_lock);
1806 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1807 return sz;
1808 }
1809
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1810 static ssize_t comp_algorithm_store(struct device *dev,
1811 struct device_attribute *attr,
1812 const char *buf,
1813 size_t len)
1814 {
1815 struct zram *zram = dev_to_zram(dev);
1816 int ret;
1817
1818 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1819 return ret ? ret : len;
1820 }
1821
1822 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1823 static ssize_t recomp_algorithm_show(struct device *dev,
1824 struct device_attribute *attr,
1825 char *buf)
1826 {
1827 struct zram *zram = dev_to_zram(dev);
1828 ssize_t sz = 0;
1829 u32 prio;
1830
1831 guard(rwsem_read)(&zram->dev_lock);
1832 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1833 if (!zram->comp_algs[prio])
1834 continue;
1835
1836 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1837 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1838 }
1839 return sz;
1840 }
1841
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1842 static ssize_t recomp_algorithm_store(struct device *dev,
1843 struct device_attribute *attr,
1844 const char *buf,
1845 size_t len)
1846 {
1847 struct zram *zram = dev_to_zram(dev);
1848 int prio = ZRAM_SECONDARY_COMP;
1849 char *args, *param, *val;
1850 char *alg = NULL;
1851 int ret;
1852
1853 args = skip_spaces(buf);
1854 while (*args) {
1855 args = next_arg(args, ¶m, &val);
1856
1857 if (!val || !*val)
1858 return -EINVAL;
1859
1860 if (!strcmp(param, "algo")) {
1861 alg = val;
1862 continue;
1863 }
1864
1865 if (!strcmp(param, "priority")) {
1866 ret = kstrtoint(val, 10, &prio);
1867 if (ret)
1868 return ret;
1869 continue;
1870 }
1871 }
1872
1873 if (!alg)
1874 return -EINVAL;
1875
1876 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1877 return -EINVAL;
1878
1879 ret = __comp_algorithm_store(zram, prio, alg);
1880 return ret ? ret : len;
1881 }
1882 #endif
1883
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1884 static ssize_t compact_store(struct device *dev, struct device_attribute *attr,
1885 const char *buf, size_t len)
1886 {
1887 struct zram *zram = dev_to_zram(dev);
1888
1889 guard(rwsem_read)(&zram->dev_lock);
1890 if (!init_done(zram))
1891 return -EINVAL;
1892
1893 zs_compact(zram->mem_pool);
1894
1895 return len;
1896 }
1897
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1898 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr,
1899 char *buf)
1900 {
1901 struct zram *zram = dev_to_zram(dev);
1902 ssize_t ret;
1903
1904 guard(rwsem_read)(&zram->dev_lock);
1905 ret = sysfs_emit(buf,
1906 "%8llu %8llu 0 %8llu\n",
1907 (u64)atomic64_read(&zram->stats.failed_reads),
1908 (u64)atomic64_read(&zram->stats.failed_writes),
1909 (u64)atomic64_read(&zram->stats.notify_free));
1910
1911 return ret;
1912 }
1913
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1914 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr,
1915 char *buf)
1916 {
1917 struct zram *zram = dev_to_zram(dev);
1918 struct zs_pool_stats pool_stats;
1919 u64 orig_size, mem_used = 0;
1920 long max_used;
1921 ssize_t ret;
1922
1923 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1924
1925 guard(rwsem_read)(&zram->dev_lock);
1926 if (init_done(zram)) {
1927 mem_used = zs_get_total_pages(zram->mem_pool);
1928 zs_pool_stats(zram->mem_pool, &pool_stats);
1929 }
1930
1931 orig_size = atomic64_read(&zram->stats.pages_stored);
1932 max_used = atomic_long_read(&zram->stats.max_used_pages);
1933
1934 ret = sysfs_emit(buf,
1935 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1936 orig_size << PAGE_SHIFT,
1937 (u64)atomic64_read(&zram->stats.compr_data_size),
1938 mem_used << PAGE_SHIFT,
1939 zram->limit_pages << PAGE_SHIFT,
1940 max_used << PAGE_SHIFT,
1941 (u64)atomic64_read(&zram->stats.same_pages),
1942 atomic_long_read(&pool_stats.pages_compacted),
1943 (u64)atomic64_read(&zram->stats.huge_pages),
1944 (u64)atomic64_read(&zram->stats.huge_pages_since));
1945
1946 return ret;
1947 }
1948
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1949 static ssize_t debug_stat_show(struct device *dev,
1950 struct device_attribute *attr, char *buf)
1951 {
1952 int version = 1;
1953 struct zram *zram = dev_to_zram(dev);
1954 ssize_t ret;
1955
1956 guard(rwsem_read)(&zram->dev_lock);
1957 ret = sysfs_emit(buf,
1958 "version: %d\n0 %8llu\n",
1959 version,
1960 (u64)atomic64_read(&zram->stats.miss_free));
1961
1962 return ret;
1963 }
1964
zram_meta_free(struct zram * zram,u64 disksize)1965 static void zram_meta_free(struct zram *zram, u64 disksize)
1966 {
1967 size_t num_pages = disksize >> PAGE_SHIFT;
1968 size_t index;
1969
1970 if (!zram->table)
1971 return;
1972
1973 /* Free all pages that are still in this zram device */
1974 for (index = 0; index < num_pages; index++)
1975 slot_free(zram, index);
1976
1977 zs_destroy_pool(zram->mem_pool);
1978 vfree(zram->table);
1979 zram->table = NULL;
1980 }
1981
zram_meta_alloc(struct zram * zram,u64 disksize)1982 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1983 {
1984 size_t num_pages, index;
1985
1986 num_pages = disksize >> PAGE_SHIFT;
1987 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1988 if (!zram->table)
1989 return false;
1990
1991 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1992 if (!zram->mem_pool) {
1993 vfree(zram->table);
1994 zram->table = NULL;
1995 return false;
1996 }
1997
1998 if (!huge_class_size)
1999 huge_class_size = zs_huge_class_size(zram->mem_pool);
2000
2001 for (index = 0; index < num_pages; index++)
2002 slot_lock_init(zram, index);
2003
2004 return true;
2005 }
2006
slot_free(struct zram * zram,u32 index)2007 static void slot_free(struct zram *zram, u32 index)
2008 {
2009 unsigned long handle;
2010
2011 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
2012 zram->table[index].attr.ac_time = 0;
2013 #endif
2014
2015 clear_slot_flag(zram, index, ZRAM_IDLE);
2016 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2017 clear_slot_flag(zram, index, ZRAM_PP_SLOT);
2018 set_slot_comp_priority(zram, index, 0);
2019
2020 if (test_slot_flag(zram, index, ZRAM_HUGE)) {
2021 /*
2022 * Writeback completion decrements ->huge_pages but keeps
2023 * ZRAM_HUGE flag for deferred decompression path.
2024 */
2025 if (!test_slot_flag(zram, index, ZRAM_WB))
2026 atomic64_dec(&zram->stats.huge_pages);
2027 clear_slot_flag(zram, index, ZRAM_HUGE);
2028 }
2029
2030 if (test_slot_flag(zram, index, ZRAM_WB)) {
2031 clear_slot_flag(zram, index, ZRAM_WB);
2032 zram_release_bdev_block(zram, get_slot_handle(zram, index));
2033 goto out;
2034 }
2035
2036 /*
2037 * No memory is allocated for same element filled pages.
2038 * Simply clear same page flag.
2039 */
2040 if (test_slot_flag(zram, index, ZRAM_SAME)) {
2041 clear_slot_flag(zram, index, ZRAM_SAME);
2042 atomic64_dec(&zram->stats.same_pages);
2043 goto out;
2044 }
2045
2046 handle = get_slot_handle(zram, index);
2047 if (!handle)
2048 return;
2049
2050 zs_free(zram->mem_pool, handle);
2051
2052 atomic64_sub(get_slot_size(zram, index),
2053 &zram->stats.compr_data_size);
2054 out:
2055 atomic64_dec(&zram->stats.pages_stored);
2056 set_slot_handle(zram, index, 0);
2057 set_slot_size(zram, index, 0);
2058 }
2059
read_same_filled_page(struct zram * zram,struct page * page,u32 index)2060 static int read_same_filled_page(struct zram *zram, struct page *page,
2061 u32 index)
2062 {
2063 void *mem;
2064
2065 mem = kmap_local_page(page);
2066 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index));
2067 kunmap_local(mem);
2068 return 0;
2069 }
2070
read_incompressible_page(struct zram * zram,struct page * page,u32 index)2071 static int read_incompressible_page(struct zram *zram, struct page *page,
2072 u32 index)
2073 {
2074 unsigned long handle;
2075 void *src, *dst;
2076
2077 handle = get_slot_handle(zram, index);
2078 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
2079 dst = kmap_local_page(page);
2080 copy_page(dst, src);
2081 kunmap_local(dst);
2082 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
2083
2084 return 0;
2085 }
2086
read_compressed_page(struct zram * zram,struct page * page,u32 index)2087 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
2088 {
2089 struct zcomp_strm *zstrm;
2090 unsigned long handle;
2091 unsigned int size;
2092 void *src, *dst;
2093 int ret, prio;
2094
2095 handle = get_slot_handle(zram, index);
2096 size = get_slot_size(zram, index);
2097 prio = get_slot_comp_priority(zram, index);
2098
2099 zstrm = zcomp_stream_get(zram->comps[prio]);
2100 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2101 zstrm->local_copy);
2102 dst = kmap_local_page(page);
2103 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
2104 kunmap_local(dst);
2105 zs_obj_read_end(zram->mem_pool, handle, size, src);
2106 zcomp_stream_put(zstrm);
2107
2108 return ret;
2109 }
2110
2111 #if defined CONFIG_ZRAM_WRITEBACK
read_from_zspool_raw(struct zram * zram,struct page * page,u32 index)2112 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
2113 {
2114 struct zcomp_strm *zstrm;
2115 unsigned long handle;
2116 unsigned int size;
2117 void *src;
2118
2119 handle = get_slot_handle(zram, index);
2120 size = get_slot_size(zram, index);
2121
2122 /*
2123 * We need to get stream just for ->local_copy buffer, in
2124 * case if object spans two physical pages. No decompression
2125 * takes place here, as we read raw compressed data.
2126 */
2127 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2128 src = zs_obj_read_begin(zram->mem_pool, handle, size,
2129 zstrm->local_copy);
2130 memcpy_to_page(page, 0, src, size);
2131 zs_obj_read_end(zram->mem_pool, handle, size, src);
2132 zcomp_stream_put(zstrm);
2133
2134 return 0;
2135 }
2136 #endif
2137
2138 /*
2139 * Reads (decompresses if needed) a page from zspool (zsmalloc).
2140 * Corresponding ZRAM slot should be locked.
2141 */
read_from_zspool(struct zram * zram,struct page * page,u32 index)2142 static int read_from_zspool(struct zram *zram, struct page *page, u32 index)
2143 {
2144 if (test_slot_flag(zram, index, ZRAM_SAME) ||
2145 !get_slot_handle(zram, index))
2146 return read_same_filled_page(zram, page, index);
2147
2148 if (!test_slot_flag(zram, index, ZRAM_HUGE))
2149 return read_compressed_page(zram, page, index);
2150 else
2151 return read_incompressible_page(zram, page, index);
2152 }
2153
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)2154 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
2155 struct bio *parent)
2156 {
2157 int ret;
2158
2159 slot_lock(zram, index);
2160 if (!test_slot_flag(zram, index, ZRAM_WB)) {
2161 /* Slot should be locked through out the function call */
2162 ret = read_from_zspool(zram, page, index);
2163 slot_unlock(zram, index);
2164 } else {
2165 unsigned long blk_idx = get_slot_handle(zram, index);
2166
2167 /*
2168 * The slot should be unlocked before reading from the backing
2169 * device.
2170 */
2171 slot_unlock(zram, index);
2172 ret = read_from_bdev(zram, page, index, blk_idx, parent);
2173 }
2174
2175 /* Should NEVER happen. Return bio error if it does. */
2176 if (WARN_ON(ret < 0))
2177 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
2178
2179 return ret;
2180 }
2181
2182 /*
2183 * Use a temporary buffer to decompress the page, as the decompressor
2184 * always expects a full page for the output.
2185 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)2186 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
2187 u32 index, int offset)
2188 {
2189 struct page *page = alloc_page(GFP_NOIO);
2190 int ret;
2191
2192 if (!page)
2193 return -ENOMEM;
2194 ret = zram_read_page(zram, page, index, NULL);
2195 if (likely(!ret))
2196 memcpy_to_bvec(bvec, page_address(page) + offset);
2197 __free_page(page);
2198 return ret;
2199 }
2200
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2201 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
2202 u32 index, int offset, struct bio *bio)
2203 {
2204 if (is_partial_io(bvec))
2205 return zram_bvec_read_partial(zram, bvec, index, offset);
2206 return zram_read_page(zram, bvec->bv_page, index, bio);
2207 }
2208
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)2209 static int write_same_filled_page(struct zram *zram, unsigned long fill,
2210 u32 index)
2211 {
2212 slot_lock(zram, index);
2213 slot_free(zram, index);
2214 set_slot_flag(zram, index, ZRAM_SAME);
2215 set_slot_handle(zram, index, fill);
2216 slot_unlock(zram, index);
2217
2218 atomic64_inc(&zram->stats.same_pages);
2219 atomic64_inc(&zram->stats.pages_stored);
2220
2221 return 0;
2222 }
2223
write_incompressible_page(struct zram * zram,struct page * page,u32 index)2224 static int write_incompressible_page(struct zram *zram, struct page *page,
2225 u32 index)
2226 {
2227 unsigned long handle;
2228 void *src;
2229
2230 /*
2231 * This function is called from preemptible context so we don't need
2232 * to do optimistic and fallback to pessimistic handle allocation,
2233 * like we do for compressible pages.
2234 */
2235 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
2236 GFP_NOIO | __GFP_NOWARN |
2237 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2238 if (IS_ERR_VALUE(handle))
2239 return PTR_ERR((void *)handle);
2240
2241 if (!zram_can_store_page(zram)) {
2242 zs_free(zram->mem_pool, handle);
2243 return -ENOMEM;
2244 }
2245
2246 src = kmap_local_page(page);
2247 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
2248 kunmap_local(src);
2249
2250 slot_lock(zram, index);
2251 slot_free(zram, index);
2252 set_slot_flag(zram, index, ZRAM_HUGE);
2253 set_slot_handle(zram, index, handle);
2254 set_slot_size(zram, index, PAGE_SIZE);
2255 slot_unlock(zram, index);
2256
2257 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
2258 atomic64_inc(&zram->stats.huge_pages);
2259 atomic64_inc(&zram->stats.huge_pages_since);
2260 atomic64_inc(&zram->stats.pages_stored);
2261
2262 return 0;
2263 }
2264
zram_write_page(struct zram * zram,struct page * page,u32 index)2265 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
2266 {
2267 int ret = 0;
2268 unsigned long handle;
2269 unsigned int comp_len;
2270 void *mem;
2271 struct zcomp_strm *zstrm;
2272 unsigned long element;
2273 bool same_filled;
2274
2275 mem = kmap_local_page(page);
2276 same_filled = page_same_filled(mem, &element);
2277 kunmap_local(mem);
2278 if (same_filled)
2279 return write_same_filled_page(zram, element, index);
2280
2281 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2282 mem = kmap_local_page(page);
2283 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
2284 mem, &comp_len);
2285 kunmap_local(mem);
2286
2287 if (unlikely(ret)) {
2288 zcomp_stream_put(zstrm);
2289 pr_err("Compression failed! err=%d\n", ret);
2290 return ret;
2291 }
2292
2293 if (comp_len >= huge_class_size) {
2294 zcomp_stream_put(zstrm);
2295 return write_incompressible_page(zram, page, index);
2296 }
2297
2298 handle = zs_malloc(zram->mem_pool, comp_len,
2299 GFP_NOIO | __GFP_NOWARN |
2300 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2301 if (IS_ERR_VALUE(handle)) {
2302 zcomp_stream_put(zstrm);
2303 return PTR_ERR((void *)handle);
2304 }
2305
2306 if (!zram_can_store_page(zram)) {
2307 zcomp_stream_put(zstrm);
2308 zs_free(zram->mem_pool, handle);
2309 return -ENOMEM;
2310 }
2311
2312 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
2313 zcomp_stream_put(zstrm);
2314
2315 slot_lock(zram, index);
2316 slot_free(zram, index);
2317 set_slot_handle(zram, index, handle);
2318 set_slot_size(zram, index, comp_len);
2319 slot_unlock(zram, index);
2320
2321 /* Update stats */
2322 atomic64_inc(&zram->stats.pages_stored);
2323 atomic64_add(comp_len, &zram->stats.compr_data_size);
2324
2325 return ret;
2326 }
2327
2328 /*
2329 * This is a partial IO. Read the full page before writing the changes.
2330 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2331 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
2332 u32 index, int offset, struct bio *bio)
2333 {
2334 struct page *page = alloc_page(GFP_NOIO);
2335 int ret;
2336
2337 if (!page)
2338 return -ENOMEM;
2339
2340 ret = zram_read_page(zram, page, index, bio);
2341 if (!ret) {
2342 memcpy_from_bvec(page_address(page) + offset, bvec);
2343 ret = zram_write_page(zram, page, index);
2344 }
2345 __free_page(page);
2346 return ret;
2347 }
2348
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)2349 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
2350 u32 index, int offset, struct bio *bio)
2351 {
2352 if (is_partial_io(bvec))
2353 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
2354 return zram_write_page(zram, bvec->bv_page, index);
2355 }
2356
2357 #ifdef CONFIG_ZRAM_MULTI_COMP
2358 #define RECOMPRESS_IDLE (1 << 0)
2359 #define RECOMPRESS_HUGE (1 << 1)
2360
highest_priority_algorithm(struct zram * zram,u32 prio)2361 static bool highest_priority_algorithm(struct zram *zram, u32 prio)
2362 {
2363 u32 p;
2364
2365 for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) {
2366 if (zram->comp_algs[p])
2367 return false;
2368 }
2369
2370 return true;
2371 }
2372
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio,struct zram_pp_ctl * ctl)2373 static void scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio,
2374 struct zram_pp_ctl *ctl)
2375 {
2376 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
2377 unsigned long index;
2378
2379 for (index = 0; index < nr_pages; index++) {
2380 bool ok = true;
2381
2382 slot_lock(zram, index);
2383 if (!slot_allocated(zram, index))
2384 goto next;
2385
2386 if (mode & RECOMPRESS_IDLE &&
2387 !test_slot_flag(zram, index, ZRAM_IDLE))
2388 goto next;
2389
2390 if (mode & RECOMPRESS_HUGE &&
2391 !test_slot_flag(zram, index, ZRAM_HUGE))
2392 goto next;
2393
2394 if (test_slot_flag(zram, index, ZRAM_WB) ||
2395 test_slot_flag(zram, index, ZRAM_SAME) ||
2396 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE))
2397 goto next;
2398
2399 /* Already compressed with same or higher priority */
2400 if (get_slot_comp_priority(zram, index) >= prio)
2401 goto next;
2402
2403 ok = place_pp_slot(zram, ctl, index);
2404 next:
2405 slot_unlock(zram, index);
2406 if (!ok)
2407 break;
2408 }
2409 }
2410
2411 /*
2412 * This function will decompress (unless it's ZRAM_HUGE) the page and then
2413 * attempt to compress it using provided compression algorithm priority
2414 * (which is potentially more effective).
2415 *
2416 * Corresponding ZRAM slot should be locked.
2417 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio)2418 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
2419 u64 *num_recomp_pages, u32 threshold, u32 prio)
2420 {
2421 struct zcomp_strm *zstrm = NULL;
2422 unsigned long handle_old;
2423 unsigned long handle_new;
2424 unsigned int comp_len_old;
2425 unsigned int comp_len_new;
2426 unsigned int class_index_old;
2427 unsigned int class_index_new;
2428 void *src;
2429 int ret = 0;
2430
2431 handle_old = get_slot_handle(zram, index);
2432 if (!handle_old)
2433 return -EINVAL;
2434
2435 comp_len_old = get_slot_size(zram, index);
2436 /*
2437 * Do not recompress objects that are already "small enough".
2438 */
2439 if (comp_len_old < threshold)
2440 return 0;
2441
2442 ret = read_from_zspool(zram, page, index);
2443 if (ret)
2444 return ret;
2445
2446 /*
2447 * We touched this entry so mark it as non-IDLE. This makes sure that
2448 * we don't preserve IDLE flag and don't incorrectly pick this entry
2449 * for different post-processing type (e.g. writeback).
2450 */
2451 clear_slot_flag(zram, index, ZRAM_IDLE);
2452
2453 zstrm = zcomp_stream_get(zram->comps[prio]);
2454 src = kmap_local_page(page);
2455 ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new);
2456 kunmap_local(src);
2457
2458 /*
2459 * Decrement the limit (if set) on pages we can recompress, even
2460 * when current recompression was unsuccessful or did not compress
2461 * the page below the threshold, because we still spent resources
2462 * on it.
2463 */
2464 if (*num_recomp_pages)
2465 *num_recomp_pages -= 1;
2466
2467 if (ret) {
2468 zcomp_stream_put(zstrm);
2469 return ret;
2470 }
2471
2472 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2473 class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new);
2474
2475 if (class_index_new >= class_index_old ||
2476 (threshold && comp_len_new >= threshold)) {
2477 zcomp_stream_put(zstrm);
2478
2479 /*
2480 * Secondary algorithms failed to re-compress the page
2481 * in a way that would save memory.
2482 *
2483 * Mark the object incompressible if the max-priority (the
2484 * last configured one) algorithm couldn't re-compress it.
2485 */
2486 if (highest_priority_algorithm(zram, prio))
2487 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2488 return 0;
2489 }
2490
2491 /*
2492 * We are holding per-CPU stream mutex and entry lock so better
2493 * avoid direct reclaim. Allocation error is not fatal since
2494 * we still have the old object in the mem_pool.
2495 *
2496 * XXX: technically, the node we really want here is the node that
2497 * holds the original compressed data. But that would require us to
2498 * modify zsmalloc API to return this information. For now, we will
2499 * make do with the node of the page allocated for recompression.
2500 */
2501 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2502 GFP_NOIO | __GFP_NOWARN |
2503 __GFP_HIGHMEM | __GFP_MOVABLE,
2504 page_to_nid(page));
2505 if (IS_ERR_VALUE(handle_new)) {
2506 zcomp_stream_put(zstrm);
2507 return PTR_ERR((void *)handle_new);
2508 }
2509
2510 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2511 zcomp_stream_put(zstrm);
2512
2513 slot_free(zram, index);
2514 set_slot_handle(zram, index, handle_new);
2515 set_slot_size(zram, index, comp_len_new);
2516 set_slot_comp_priority(zram, index, prio);
2517
2518 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2519 atomic64_inc(&zram->stats.pages_stored);
2520
2521 return 0;
2522 }
2523
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2524 static ssize_t recompress_store(struct device *dev,
2525 struct device_attribute *attr,
2526 const char *buf, size_t len)
2527 {
2528 struct zram *zram = dev_to_zram(dev);
2529 char *args, *param, *val, *algo = NULL;
2530 u64 num_recomp_pages = ULLONG_MAX;
2531 struct zram_pp_ctl *ctl = NULL;
2532 s32 prio = ZRAM_SECONDARY_COMP;
2533 u32 mode = 0, threshold = 0;
2534 struct zram_pp_slot *pps;
2535 struct page *page = NULL;
2536 bool prio_param = false;
2537 ssize_t ret;
2538
2539 args = skip_spaces(buf);
2540 while (*args) {
2541 args = next_arg(args, ¶m, &val);
2542
2543 if (!val || !*val)
2544 return -EINVAL;
2545
2546 if (!strcmp(param, "type")) {
2547 if (!strcmp(val, "idle"))
2548 mode = RECOMPRESS_IDLE;
2549 if (!strcmp(val, "huge"))
2550 mode = RECOMPRESS_HUGE;
2551 if (!strcmp(val, "huge_idle"))
2552 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2553 if (!mode)
2554 return -EINVAL;
2555 continue;
2556 }
2557
2558 if (!strcmp(param, "max_pages")) {
2559 /*
2560 * Limit the number of entries (pages) we attempt to
2561 * recompress.
2562 */
2563 ret = kstrtoull(val, 10, &num_recomp_pages);
2564 if (ret)
2565 return ret;
2566 continue;
2567 }
2568
2569 if (!strcmp(param, "threshold")) {
2570 /*
2571 * We will re-compress only idle objects equal or
2572 * greater in size than watermark.
2573 */
2574 ret = kstrtouint(val, 10, &threshold);
2575 if (ret)
2576 return ret;
2577 continue;
2578 }
2579
2580 if (!strcmp(param, "algo")) {
2581 algo = val;
2582 continue;
2583 }
2584
2585 if (!strcmp(param, "priority")) {
2586 prio_param = true;
2587 ret = kstrtoint(val, 10, &prio);
2588 if (ret)
2589 return ret;
2590 continue;
2591 }
2592 }
2593
2594 if (threshold >= huge_class_size)
2595 return -EINVAL;
2596
2597 guard(rwsem_write)(&zram->dev_lock);
2598 if (!init_done(zram))
2599 return -EINVAL;
2600
2601 if (prio_param) {
2602 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
2603 return -EINVAL;
2604 }
2605
2606 if (algo && prio_param) {
2607 ret = validate_algo_priority(zram, algo, prio);
2608 if (ret)
2609 return ret;
2610 }
2611
2612 if (algo && !prio_param) {
2613 prio = lookup_algo_priority(zram, algo, ZRAM_SECONDARY_COMP);
2614 if (prio < 0)
2615 return -EINVAL;
2616 }
2617
2618 if (!zram->comps[prio])
2619 return -EINVAL;
2620
2621 page = alloc_page(GFP_KERNEL);
2622 if (!page) {
2623 ret = -ENOMEM;
2624 goto out;
2625 }
2626
2627 ctl = init_pp_ctl();
2628 if (!ctl) {
2629 ret = -ENOMEM;
2630 goto out;
2631 }
2632
2633 scan_slots_for_recompress(zram, mode, prio, ctl);
2634
2635 ret = len;
2636 while ((pps = select_pp_slot(ctl))) {
2637 int err = 0;
2638
2639 if (!num_recomp_pages)
2640 break;
2641
2642 slot_lock(zram, pps->index);
2643 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT))
2644 goto next;
2645
2646 err = recompress_slot(zram, pps->index, page,
2647 &num_recomp_pages, threshold, prio);
2648 next:
2649 slot_unlock(zram, pps->index);
2650 release_pp_slot(zram, pps);
2651
2652 if (err) {
2653 ret = err;
2654 break;
2655 }
2656
2657 cond_resched();
2658 }
2659
2660 out:
2661 if (page)
2662 __free_page(page);
2663 release_pp_ctl(zram, ctl);
2664 return ret;
2665 }
2666 #endif
2667
zram_bio_discard(struct zram * zram,struct bio * bio)2668 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2669 {
2670 size_t n = bio->bi_iter.bi_size;
2671 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2672 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2673 SECTOR_SHIFT;
2674
2675 /*
2676 * zram manages data in physical block size units. Because logical block
2677 * size isn't identical with physical block size on some arch, we
2678 * could get a discard request pointing to a specific offset within a
2679 * certain physical block. Although we can handle this request by
2680 * reading that physiclal block and decompressing and partially zeroing
2681 * and re-compressing and then re-storing it, this isn't reasonable
2682 * because our intent with a discard request is to save memory. So
2683 * skipping this logical block is appropriate here.
2684 */
2685 if (offset) {
2686 if (n <= (PAGE_SIZE - offset))
2687 goto end_bio;
2688
2689 n -= (PAGE_SIZE - offset);
2690 index++;
2691 }
2692
2693 while (n >= PAGE_SIZE) {
2694 slot_lock(zram, index);
2695 slot_free(zram, index);
2696 slot_unlock(zram, index);
2697 atomic64_inc(&zram->stats.notify_free);
2698 index++;
2699 n -= PAGE_SIZE;
2700 }
2701
2702 end_bio:
2703 bio_endio(bio);
2704 }
2705
zram_bio_read(struct zram * zram,struct bio * bio)2706 static void zram_bio_read(struct zram *zram, struct bio *bio)
2707 {
2708 unsigned long start_time = bio_start_io_acct(bio);
2709 struct bvec_iter iter = bio->bi_iter;
2710
2711 do {
2712 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2713 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2714 SECTOR_SHIFT;
2715 struct bio_vec bv = bio_iter_iovec(bio, iter);
2716
2717 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2718
2719 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2720 atomic64_inc(&zram->stats.failed_reads);
2721 bio->bi_status = BLK_STS_IOERR;
2722 break;
2723 }
2724 flush_dcache_page(bv.bv_page);
2725
2726 slot_lock(zram, index);
2727 mark_slot_accessed(zram, index);
2728 slot_unlock(zram, index);
2729
2730 bio_advance_iter_single(bio, &iter, bv.bv_len);
2731 } while (iter.bi_size);
2732
2733 bio_end_io_acct(bio, start_time);
2734 bio_endio(bio);
2735 }
2736
zram_bio_write(struct zram * zram,struct bio * bio)2737 static void zram_bio_write(struct zram *zram, struct bio *bio)
2738 {
2739 unsigned long start_time = bio_start_io_acct(bio);
2740 struct bvec_iter iter = bio->bi_iter;
2741
2742 do {
2743 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2744 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2745 SECTOR_SHIFT;
2746 struct bio_vec bv = bio_iter_iovec(bio, iter);
2747
2748 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2749
2750 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2751 atomic64_inc(&zram->stats.failed_writes);
2752 bio->bi_status = BLK_STS_IOERR;
2753 break;
2754 }
2755
2756 slot_lock(zram, index);
2757 mark_slot_accessed(zram, index);
2758 slot_unlock(zram, index);
2759
2760 bio_advance_iter_single(bio, &iter, bv.bv_len);
2761 } while (iter.bi_size);
2762
2763 bio_end_io_acct(bio, start_time);
2764 bio_endio(bio);
2765 }
2766
2767 /*
2768 * Handler function for all zram I/O requests.
2769 */
zram_submit_bio(struct bio * bio)2770 static void zram_submit_bio(struct bio *bio)
2771 {
2772 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2773
2774 switch (bio_op(bio)) {
2775 case REQ_OP_READ:
2776 zram_bio_read(zram, bio);
2777 break;
2778 case REQ_OP_WRITE:
2779 zram_bio_write(zram, bio);
2780 break;
2781 case REQ_OP_DISCARD:
2782 case REQ_OP_WRITE_ZEROES:
2783 zram_bio_discard(zram, bio);
2784 break;
2785 default:
2786 WARN_ON_ONCE(1);
2787 bio_endio(bio);
2788 }
2789 }
2790
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2791 static void zram_slot_free_notify(struct block_device *bdev,
2792 unsigned long index)
2793 {
2794 struct zram *zram;
2795
2796 zram = bdev->bd_disk->private_data;
2797
2798 atomic64_inc(&zram->stats.notify_free);
2799 if (!slot_trylock(zram, index)) {
2800 atomic64_inc(&zram->stats.miss_free);
2801 return;
2802 }
2803
2804 slot_free(zram, index);
2805 slot_unlock(zram, index);
2806 }
2807
zram_comp_params_reset(struct zram * zram)2808 static void zram_comp_params_reset(struct zram *zram)
2809 {
2810 u32 prio;
2811
2812 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2813 comp_params_reset(zram, prio);
2814 }
2815 }
2816
zram_destroy_comps(struct zram * zram)2817 static void zram_destroy_comps(struct zram *zram)
2818 {
2819 u32 prio;
2820
2821 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2822 struct zcomp *comp = zram->comps[prio];
2823
2824 zram->comps[prio] = NULL;
2825 if (!comp)
2826 continue;
2827 zcomp_destroy(comp);
2828 }
2829
2830 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++)
2831 zram->comp_algs[prio] = NULL;
2832
2833 zram_comp_params_reset(zram);
2834 }
2835
zram_reset_device(struct zram * zram)2836 static void zram_reset_device(struct zram *zram)
2837 {
2838 guard(rwsem_write)(&zram->dev_lock);
2839
2840 zram->limit_pages = 0;
2841
2842 set_capacity_and_notify(zram->disk, 0);
2843 part_stat_set_all(zram->disk->part0, 0);
2844
2845 /* I/O operation under all of CPU are done so let's free */
2846 zram_meta_free(zram, zram->disksize);
2847 zram->disksize = 0;
2848 zram_destroy_comps(zram);
2849 memset(&zram->stats, 0, sizeof(zram->stats));
2850 reset_bdev(zram);
2851
2852 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2853 }
2854
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2855 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr,
2856 const char *buf, size_t len)
2857 {
2858 u64 disksize;
2859 struct zcomp *comp;
2860 struct zram *zram = dev_to_zram(dev);
2861 int err;
2862 u32 prio;
2863
2864 disksize = memparse(buf, NULL);
2865 if (!disksize)
2866 return -EINVAL;
2867
2868 guard(rwsem_write)(&zram->dev_lock);
2869 if (init_done(zram)) {
2870 pr_info("Cannot change disksize for initialized device\n");
2871 return -EBUSY;
2872 }
2873
2874 disksize = PAGE_ALIGN(disksize);
2875 if (!zram_meta_alloc(zram, disksize))
2876 return -ENOMEM;
2877
2878 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2879 if (!zram->comp_algs[prio])
2880 continue;
2881
2882 comp = zcomp_create(zram->comp_algs[prio],
2883 &zram->params[prio]);
2884 if (IS_ERR(comp)) {
2885 pr_err("Cannot initialise %s compressing backend\n",
2886 zram->comp_algs[prio]);
2887 err = PTR_ERR(comp);
2888 goto out_free_comps;
2889 }
2890
2891 zram->comps[prio] = comp;
2892 }
2893 zram->disksize = disksize;
2894 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2895
2896 return len;
2897
2898 out_free_comps:
2899 zram_destroy_comps(zram);
2900 zram_meta_free(zram, disksize);
2901 return err;
2902 }
2903
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2904 static ssize_t reset_store(struct device *dev,
2905 struct device_attribute *attr, const char *buf, size_t len)
2906 {
2907 int ret;
2908 unsigned short do_reset;
2909 struct zram *zram;
2910 struct gendisk *disk;
2911
2912 ret = kstrtou16(buf, 10, &do_reset);
2913 if (ret)
2914 return ret;
2915
2916 if (!do_reset)
2917 return -EINVAL;
2918
2919 zram = dev_to_zram(dev);
2920 disk = zram->disk;
2921
2922 mutex_lock(&disk->open_mutex);
2923 /* Do not reset an active device or claimed device */
2924 if (disk_openers(disk) || zram->claim) {
2925 mutex_unlock(&disk->open_mutex);
2926 return -EBUSY;
2927 }
2928
2929 /* From now on, anyone can't open /dev/zram[0-9] */
2930 zram->claim = true;
2931 mutex_unlock(&disk->open_mutex);
2932
2933 /* Make sure all the pending I/O are finished */
2934 sync_blockdev(disk->part0);
2935 zram_reset_device(zram);
2936
2937 mutex_lock(&disk->open_mutex);
2938 zram->claim = false;
2939 mutex_unlock(&disk->open_mutex);
2940
2941 return len;
2942 }
2943
zram_open(struct gendisk * disk,blk_mode_t mode)2944 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2945 {
2946 struct zram *zram = disk->private_data;
2947
2948 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2949
2950 /* zram was claimed to reset so open request fails */
2951 if (zram->claim)
2952 return -EBUSY;
2953 return 0;
2954 }
2955
2956 static const struct block_device_operations zram_devops = {
2957 .open = zram_open,
2958 .submit_bio = zram_submit_bio,
2959 .swap_slot_free_notify = zram_slot_free_notify,
2960 .owner = THIS_MODULE
2961 };
2962
2963 static DEVICE_ATTR_RO(io_stat);
2964 static DEVICE_ATTR_RO(mm_stat);
2965 static DEVICE_ATTR_RO(debug_stat);
2966 static DEVICE_ATTR_WO(compact);
2967 static DEVICE_ATTR_RW(disksize);
2968 static DEVICE_ATTR_RO(initstate);
2969 static DEVICE_ATTR_WO(reset);
2970 static DEVICE_ATTR_WO(mem_limit);
2971 static DEVICE_ATTR_WO(mem_used_max);
2972 static DEVICE_ATTR_WO(idle);
2973 static DEVICE_ATTR_RW(comp_algorithm);
2974 #ifdef CONFIG_ZRAM_WRITEBACK
2975 static DEVICE_ATTR_RO(bd_stat);
2976 static DEVICE_ATTR_RW(backing_dev);
2977 static DEVICE_ATTR_WO(writeback);
2978 static DEVICE_ATTR_RW(writeback_limit);
2979 static DEVICE_ATTR_RW(writeback_limit_enable);
2980 static DEVICE_ATTR_RW(writeback_batch_size);
2981 static DEVICE_ATTR_RW(compressed_writeback);
2982 #endif
2983 #ifdef CONFIG_ZRAM_MULTI_COMP
2984 static DEVICE_ATTR_RW(recomp_algorithm);
2985 static DEVICE_ATTR_WO(recompress);
2986 #endif
2987 static DEVICE_ATTR_WO(algorithm_params);
2988
2989 static struct attribute *zram_disk_attrs[] = {
2990 &dev_attr_disksize.attr,
2991 &dev_attr_initstate.attr,
2992 &dev_attr_reset.attr,
2993 &dev_attr_compact.attr,
2994 &dev_attr_mem_limit.attr,
2995 &dev_attr_mem_used_max.attr,
2996 &dev_attr_idle.attr,
2997 &dev_attr_comp_algorithm.attr,
2998 #ifdef CONFIG_ZRAM_WRITEBACK
2999 &dev_attr_bd_stat.attr,
3000 &dev_attr_backing_dev.attr,
3001 &dev_attr_writeback.attr,
3002 &dev_attr_writeback_limit.attr,
3003 &dev_attr_writeback_limit_enable.attr,
3004 &dev_attr_writeback_batch_size.attr,
3005 &dev_attr_compressed_writeback.attr,
3006 #endif
3007 &dev_attr_io_stat.attr,
3008 &dev_attr_mm_stat.attr,
3009 &dev_attr_debug_stat.attr,
3010 #ifdef CONFIG_ZRAM_MULTI_COMP
3011 &dev_attr_recomp_algorithm.attr,
3012 &dev_attr_recompress.attr,
3013 #endif
3014 &dev_attr_algorithm_params.attr,
3015 NULL,
3016 };
3017
3018 ATTRIBUTE_GROUPS(zram_disk);
3019
3020 /*
3021 * Allocate and initialize new zram device. the function returns
3022 * '>= 0' device_id upon success, and negative value otherwise.
3023 */
zram_add(void)3024 static int zram_add(void)
3025 {
3026 struct queue_limits lim = {
3027 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
3028 /*
3029 * To ensure that we always get PAGE_SIZE aligned and
3030 * n*PAGE_SIZED sized I/O requests.
3031 */
3032 .physical_block_size = PAGE_SIZE,
3033 .io_min = PAGE_SIZE,
3034 .io_opt = PAGE_SIZE,
3035 .max_hw_discard_sectors = UINT_MAX,
3036 /*
3037 * zram_bio_discard() will clear all logical blocks if logical
3038 * block size is identical with physical block size(PAGE_SIZE).
3039 * But if it is different, we will skip discarding some parts of
3040 * logical blocks in the part of the request range which isn't
3041 * aligned to physical block size. So we can't ensure that all
3042 * discarded logical blocks are zeroed.
3043 */
3044 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
3045 .max_write_zeroes_sectors = UINT_MAX,
3046 #endif
3047 .features = BLK_FEAT_STABLE_WRITES |
3048 BLK_FEAT_SYNCHRONOUS,
3049 };
3050 struct zram *zram;
3051 int ret, device_id;
3052
3053 zram = kzalloc_obj(struct zram);
3054 if (!zram)
3055 return -ENOMEM;
3056
3057 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
3058 if (ret < 0)
3059 goto out_free_dev;
3060 device_id = ret;
3061
3062 init_rwsem(&zram->dev_lock);
3063 #ifdef CONFIG_ZRAM_WRITEBACK
3064 zram->wb_batch_size = 32;
3065 zram->compressed_wb = false;
3066 #endif
3067
3068 /* gendisk structure */
3069 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
3070 if (IS_ERR(zram->disk)) {
3071 pr_err("Error allocating disk structure for device %d\n",
3072 device_id);
3073 ret = PTR_ERR(zram->disk);
3074 goto out_free_idr;
3075 }
3076
3077 zram->disk->major = zram_major;
3078 zram->disk->first_minor = device_id;
3079 zram->disk->minors = 1;
3080 zram->disk->flags |= GENHD_FL_NO_PART;
3081 zram->disk->fops = &zram_devops;
3082 zram->disk->private_data = zram;
3083 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
3084 zram_comp_params_reset(zram);
3085 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
3086
3087 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
3088 set_capacity(zram->disk, 0);
3089 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
3090 if (ret)
3091 goto out_cleanup_disk;
3092
3093 zram_debugfs_register(zram);
3094 pr_info("Added device: %s\n", zram->disk->disk_name);
3095 return device_id;
3096
3097 out_cleanup_disk:
3098 put_disk(zram->disk);
3099 out_free_idr:
3100 idr_remove(&zram_index_idr, device_id);
3101 out_free_dev:
3102 kfree(zram);
3103 return ret;
3104 }
3105
zram_remove(struct zram * zram)3106 static int zram_remove(struct zram *zram)
3107 {
3108 bool claimed;
3109
3110 mutex_lock(&zram->disk->open_mutex);
3111 if (disk_openers(zram->disk)) {
3112 mutex_unlock(&zram->disk->open_mutex);
3113 return -EBUSY;
3114 }
3115
3116 claimed = zram->claim;
3117 if (!claimed)
3118 zram->claim = true;
3119 mutex_unlock(&zram->disk->open_mutex);
3120
3121 zram_debugfs_unregister(zram);
3122
3123 if (claimed) {
3124 /*
3125 * If we were claimed by reset_store(), del_gendisk() will
3126 * wait until reset_store() is done, so nothing need to do.
3127 */
3128 ;
3129 } else {
3130 /* Make sure all the pending I/O are finished */
3131 sync_blockdev(zram->disk->part0);
3132 zram_reset_device(zram);
3133 }
3134
3135 pr_info("Removed device: %s\n", zram->disk->disk_name);
3136
3137 del_gendisk(zram->disk);
3138
3139 /* del_gendisk drains pending reset_store */
3140 WARN_ON_ONCE(claimed && zram->claim);
3141
3142 /*
3143 * disksize_store() may be called in between zram_reset_device()
3144 * and del_gendisk(), so run the last reset to avoid leaking
3145 * anything allocated with disksize_store()
3146 */
3147 zram_reset_device(zram);
3148
3149 put_disk(zram->disk);
3150 kfree(zram);
3151 return 0;
3152 }
3153
3154 /* zram-control sysfs attributes */
3155
3156 /*
3157 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
3158 * sense that reading from this file does alter the state of your system -- it
3159 * creates a new un-initialized zram device and returns back this device's
3160 * device_id (or an error code if it fails to create a new device).
3161 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)3162 static ssize_t hot_add_show(const struct class *class,
3163 const struct class_attribute *attr,
3164 char *buf)
3165 {
3166 int ret;
3167
3168 mutex_lock(&zram_index_mutex);
3169 ret = zram_add();
3170 mutex_unlock(&zram_index_mutex);
3171
3172 if (ret < 0)
3173 return ret;
3174 return sysfs_emit(buf, "%d\n", ret);
3175 }
3176 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
3177 static struct class_attribute class_attr_hot_add =
3178 __ATTR(hot_add, 0400, hot_add_show, NULL);
3179
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)3180 static ssize_t hot_remove_store(const struct class *class,
3181 const struct class_attribute *attr,
3182 const char *buf,
3183 size_t count)
3184 {
3185 struct zram *zram;
3186 int ret, dev_id;
3187
3188 /* dev_id is gendisk->first_minor, which is `int' */
3189 ret = kstrtoint(buf, 10, &dev_id);
3190 if (ret)
3191 return ret;
3192 if (dev_id < 0)
3193 return -EINVAL;
3194
3195 mutex_lock(&zram_index_mutex);
3196
3197 zram = idr_find(&zram_index_idr, dev_id);
3198 if (zram) {
3199 ret = zram_remove(zram);
3200 if (!ret)
3201 idr_remove(&zram_index_idr, dev_id);
3202 } else {
3203 ret = -ENODEV;
3204 }
3205
3206 mutex_unlock(&zram_index_mutex);
3207 return ret ? ret : count;
3208 }
3209 static CLASS_ATTR_WO(hot_remove);
3210
3211 static struct attribute *zram_control_class_attrs[] = {
3212 &class_attr_hot_add.attr,
3213 &class_attr_hot_remove.attr,
3214 NULL,
3215 };
3216 ATTRIBUTE_GROUPS(zram_control_class);
3217
3218 static struct class zram_control_class = {
3219 .name = "zram-control",
3220 .class_groups = zram_control_class_groups,
3221 };
3222
zram_remove_cb(int id,void * ptr,void * data)3223 static int zram_remove_cb(int id, void *ptr, void *data)
3224 {
3225 WARN_ON_ONCE(zram_remove(ptr));
3226 return 0;
3227 }
3228
destroy_devices(void)3229 static void destroy_devices(void)
3230 {
3231 class_unregister(&zram_control_class);
3232 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
3233 zram_debugfs_destroy();
3234 idr_destroy(&zram_index_idr);
3235 unregister_blkdev(zram_major, "zram");
3236 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3237 }
3238
zram_init(void)3239 static int __init zram_init(void)
3240 {
3241 struct zram_table_entry zram_te;
3242 int ret;
3243
3244 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8);
3245
3246 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
3247 zcomp_cpu_up_prepare, zcomp_cpu_dead);
3248 if (ret < 0)
3249 return ret;
3250
3251 ret = class_register(&zram_control_class);
3252 if (ret) {
3253 pr_err("Unable to register zram-control class\n");
3254 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3255 return ret;
3256 }
3257
3258 zram_debugfs_create();
3259 zram_major = register_blkdev(0, "zram");
3260 if (zram_major <= 0) {
3261 pr_err("Unable to get major number\n");
3262 class_unregister(&zram_control_class);
3263 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3264 return -EBUSY;
3265 }
3266
3267 while (num_devices != 0) {
3268 mutex_lock(&zram_index_mutex);
3269 ret = zram_add();
3270 mutex_unlock(&zram_index_mutex);
3271 if (ret < 0)
3272 goto out_error;
3273 num_devices--;
3274 }
3275
3276 return 0;
3277
3278 out_error:
3279 destroy_devices();
3280 return ret;
3281 }
3282
zram_exit(void)3283 static void __exit zram_exit(void)
3284 {
3285 destroy_devices();
3286 }
3287
3288 module_init(zram_init);
3289 module_exit(zram_exit);
3290
3291 module_param(num_devices, uint, 0);
3292 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
3293
3294 MODULE_LICENSE("Dual BSD/GPL");
3295 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
3296 MODULE_DESCRIPTION("Compressed RAM Block Device");
3297