xref: /linux/drivers/block/zram/zram_drv.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14 
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/cpuhotplug.h>
34 
35 #include "zram_drv.h"
36 
37 static DEFINE_IDR(zram_index_idr);
38 /* idr index must be protected */
39 static DEFINE_MUTEX(zram_index_mutex);
40 
41 static int zram_major;
42 static const char *default_compressor = "lzo";
43 
44 /* Module params (documentation at end) */
45 static unsigned int num_devices = 1;
46 
47 static inline void deprecated_attr_warn(const char *name)
48 {
49 	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
50 			task_pid_nr(current),
51 			current->comm,
52 			name,
53 			"See zram documentation.");
54 }
55 
56 #define ZRAM_ATTR_RO(name)						\
57 static ssize_t name##_show(struct device *d,				\
58 				struct device_attribute *attr, char *b)	\
59 {									\
60 	struct zram *zram = dev_to_zram(d);				\
61 									\
62 	deprecated_attr_warn(__stringify(name));			\
63 	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
64 		(u64)atomic64_read(&zram->stats.name));			\
65 }									\
66 static DEVICE_ATTR_RO(name);
67 
68 static inline bool init_done(struct zram *zram)
69 {
70 	return zram->disksize;
71 }
72 
73 static inline struct zram *dev_to_zram(struct device *dev)
74 {
75 	return (struct zram *)dev_to_disk(dev)->private_data;
76 }
77 
78 /* flag operations require table entry bit_spin_lock() being held */
79 static int zram_test_flag(struct zram_meta *meta, u32 index,
80 			enum zram_pageflags flag)
81 {
82 	return meta->table[index].value & BIT(flag);
83 }
84 
85 static void zram_set_flag(struct zram_meta *meta, u32 index,
86 			enum zram_pageflags flag)
87 {
88 	meta->table[index].value |= BIT(flag);
89 }
90 
91 static void zram_clear_flag(struct zram_meta *meta, u32 index,
92 			enum zram_pageflags flag)
93 {
94 	meta->table[index].value &= ~BIT(flag);
95 }
96 
97 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
98 {
99 	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
100 }
101 
102 static void zram_set_obj_size(struct zram_meta *meta,
103 					u32 index, size_t size)
104 {
105 	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
106 
107 	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
108 }
109 
110 static inline bool is_partial_io(struct bio_vec *bvec)
111 {
112 	return bvec->bv_len != PAGE_SIZE;
113 }
114 
115 /*
116  * Check if request is within bounds and aligned on zram logical blocks.
117  */
118 static inline bool valid_io_request(struct zram *zram,
119 		sector_t start, unsigned int size)
120 {
121 	u64 end, bound;
122 
123 	/* unaligned request */
124 	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
125 		return false;
126 	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
127 		return false;
128 
129 	end = start + (size >> SECTOR_SHIFT);
130 	bound = zram->disksize >> SECTOR_SHIFT;
131 	/* out of range range */
132 	if (unlikely(start >= bound || end > bound || start > end))
133 		return false;
134 
135 	/* I/O request is valid */
136 	return true;
137 }
138 
139 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
140 {
141 	if (*offset + bvec->bv_len >= PAGE_SIZE)
142 		(*index)++;
143 	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
144 }
145 
146 static inline void update_used_max(struct zram *zram,
147 					const unsigned long pages)
148 {
149 	unsigned long old_max, cur_max;
150 
151 	old_max = atomic_long_read(&zram->stats.max_used_pages);
152 
153 	do {
154 		cur_max = old_max;
155 		if (pages > cur_max)
156 			old_max = atomic_long_cmpxchg(
157 				&zram->stats.max_used_pages, cur_max, pages);
158 	} while (old_max != cur_max);
159 }
160 
161 static bool page_zero_filled(void *ptr)
162 {
163 	unsigned int pos;
164 	unsigned long *page;
165 
166 	page = (unsigned long *)ptr;
167 
168 	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
169 		if (page[pos])
170 			return false;
171 	}
172 
173 	return true;
174 }
175 
176 static void handle_zero_page(struct bio_vec *bvec)
177 {
178 	struct page *page = bvec->bv_page;
179 	void *user_mem;
180 
181 	user_mem = kmap_atomic(page);
182 	if (is_partial_io(bvec))
183 		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
184 	else
185 		clear_page(user_mem);
186 	kunmap_atomic(user_mem);
187 
188 	flush_dcache_page(page);
189 }
190 
191 static ssize_t initstate_show(struct device *dev,
192 		struct device_attribute *attr, char *buf)
193 {
194 	u32 val;
195 	struct zram *zram = dev_to_zram(dev);
196 
197 	down_read(&zram->init_lock);
198 	val = init_done(zram);
199 	up_read(&zram->init_lock);
200 
201 	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
202 }
203 
204 static ssize_t disksize_show(struct device *dev,
205 		struct device_attribute *attr, char *buf)
206 {
207 	struct zram *zram = dev_to_zram(dev);
208 
209 	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
210 }
211 
212 static ssize_t orig_data_size_show(struct device *dev,
213 		struct device_attribute *attr, char *buf)
214 {
215 	struct zram *zram = dev_to_zram(dev);
216 
217 	deprecated_attr_warn("orig_data_size");
218 	return scnprintf(buf, PAGE_SIZE, "%llu\n",
219 		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
220 }
221 
222 static ssize_t mem_used_total_show(struct device *dev,
223 		struct device_attribute *attr, char *buf)
224 {
225 	u64 val = 0;
226 	struct zram *zram = dev_to_zram(dev);
227 
228 	deprecated_attr_warn("mem_used_total");
229 	down_read(&zram->init_lock);
230 	if (init_done(zram)) {
231 		struct zram_meta *meta = zram->meta;
232 		val = zs_get_total_pages(meta->mem_pool);
233 	}
234 	up_read(&zram->init_lock);
235 
236 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
237 }
238 
239 static ssize_t mem_limit_show(struct device *dev,
240 		struct device_attribute *attr, char *buf)
241 {
242 	u64 val;
243 	struct zram *zram = dev_to_zram(dev);
244 
245 	deprecated_attr_warn("mem_limit");
246 	down_read(&zram->init_lock);
247 	val = zram->limit_pages;
248 	up_read(&zram->init_lock);
249 
250 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
251 }
252 
253 static ssize_t mem_limit_store(struct device *dev,
254 		struct device_attribute *attr, const char *buf, size_t len)
255 {
256 	u64 limit;
257 	char *tmp;
258 	struct zram *zram = dev_to_zram(dev);
259 
260 	limit = memparse(buf, &tmp);
261 	if (buf == tmp) /* no chars parsed, invalid input */
262 		return -EINVAL;
263 
264 	down_write(&zram->init_lock);
265 	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
266 	up_write(&zram->init_lock);
267 
268 	return len;
269 }
270 
271 static ssize_t mem_used_max_show(struct device *dev,
272 		struct device_attribute *attr, char *buf)
273 {
274 	u64 val = 0;
275 	struct zram *zram = dev_to_zram(dev);
276 
277 	deprecated_attr_warn("mem_used_max");
278 	down_read(&zram->init_lock);
279 	if (init_done(zram))
280 		val = atomic_long_read(&zram->stats.max_used_pages);
281 	up_read(&zram->init_lock);
282 
283 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
284 }
285 
286 static ssize_t mem_used_max_store(struct device *dev,
287 		struct device_attribute *attr, const char *buf, size_t len)
288 {
289 	int err;
290 	unsigned long val;
291 	struct zram *zram = dev_to_zram(dev);
292 
293 	err = kstrtoul(buf, 10, &val);
294 	if (err || val != 0)
295 		return -EINVAL;
296 
297 	down_read(&zram->init_lock);
298 	if (init_done(zram)) {
299 		struct zram_meta *meta = zram->meta;
300 		atomic_long_set(&zram->stats.max_used_pages,
301 				zs_get_total_pages(meta->mem_pool));
302 	}
303 	up_read(&zram->init_lock);
304 
305 	return len;
306 }
307 
308 /*
309  * We switched to per-cpu streams and this attr is not needed anymore.
310  * However, we will keep it around for some time, because:
311  * a) we may revert per-cpu streams in the future
312  * b) it's visible to user space and we need to follow our 2 years
313  *    retirement rule; but we already have a number of 'soon to be
314  *    altered' attrs, so max_comp_streams need to wait for the next
315  *    layoff cycle.
316  */
317 static ssize_t max_comp_streams_show(struct device *dev,
318 		struct device_attribute *attr, char *buf)
319 {
320 	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
321 }
322 
323 static ssize_t max_comp_streams_store(struct device *dev,
324 		struct device_attribute *attr, const char *buf, size_t len)
325 {
326 	return len;
327 }
328 
329 static ssize_t comp_algorithm_show(struct device *dev,
330 		struct device_attribute *attr, char *buf)
331 {
332 	size_t sz;
333 	struct zram *zram = dev_to_zram(dev);
334 
335 	down_read(&zram->init_lock);
336 	sz = zcomp_available_show(zram->compressor, buf);
337 	up_read(&zram->init_lock);
338 
339 	return sz;
340 }
341 
342 static ssize_t comp_algorithm_store(struct device *dev,
343 		struct device_attribute *attr, const char *buf, size_t len)
344 {
345 	struct zram *zram = dev_to_zram(dev);
346 	char compressor[CRYPTO_MAX_ALG_NAME];
347 	size_t sz;
348 
349 	strlcpy(compressor, buf, sizeof(compressor));
350 	/* ignore trailing newline */
351 	sz = strlen(compressor);
352 	if (sz > 0 && compressor[sz - 1] == '\n')
353 		compressor[sz - 1] = 0x00;
354 
355 	if (!zcomp_available_algorithm(compressor))
356 		return -EINVAL;
357 
358 	down_write(&zram->init_lock);
359 	if (init_done(zram)) {
360 		up_write(&zram->init_lock);
361 		pr_info("Can't change algorithm for initialized device\n");
362 		return -EBUSY;
363 	}
364 
365 	strlcpy(zram->compressor, compressor, sizeof(compressor));
366 	up_write(&zram->init_lock);
367 	return len;
368 }
369 
370 static ssize_t compact_store(struct device *dev,
371 		struct device_attribute *attr, const char *buf, size_t len)
372 {
373 	struct zram *zram = dev_to_zram(dev);
374 	struct zram_meta *meta;
375 
376 	down_read(&zram->init_lock);
377 	if (!init_done(zram)) {
378 		up_read(&zram->init_lock);
379 		return -EINVAL;
380 	}
381 
382 	meta = zram->meta;
383 	zs_compact(meta->mem_pool);
384 	up_read(&zram->init_lock);
385 
386 	return len;
387 }
388 
389 static ssize_t io_stat_show(struct device *dev,
390 		struct device_attribute *attr, char *buf)
391 {
392 	struct zram *zram = dev_to_zram(dev);
393 	ssize_t ret;
394 
395 	down_read(&zram->init_lock);
396 	ret = scnprintf(buf, PAGE_SIZE,
397 			"%8llu %8llu %8llu %8llu\n",
398 			(u64)atomic64_read(&zram->stats.failed_reads),
399 			(u64)atomic64_read(&zram->stats.failed_writes),
400 			(u64)atomic64_read(&zram->stats.invalid_io),
401 			(u64)atomic64_read(&zram->stats.notify_free));
402 	up_read(&zram->init_lock);
403 
404 	return ret;
405 }
406 
407 static ssize_t mm_stat_show(struct device *dev,
408 		struct device_attribute *attr, char *buf)
409 {
410 	struct zram *zram = dev_to_zram(dev);
411 	struct zs_pool_stats pool_stats;
412 	u64 orig_size, mem_used = 0;
413 	long max_used;
414 	ssize_t ret;
415 
416 	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
417 
418 	down_read(&zram->init_lock);
419 	if (init_done(zram)) {
420 		mem_used = zs_get_total_pages(zram->meta->mem_pool);
421 		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
422 	}
423 
424 	orig_size = atomic64_read(&zram->stats.pages_stored);
425 	max_used = atomic_long_read(&zram->stats.max_used_pages);
426 
427 	ret = scnprintf(buf, PAGE_SIZE,
428 			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
429 			orig_size << PAGE_SHIFT,
430 			(u64)atomic64_read(&zram->stats.compr_data_size),
431 			mem_used << PAGE_SHIFT,
432 			zram->limit_pages << PAGE_SHIFT,
433 			max_used << PAGE_SHIFT,
434 			(u64)atomic64_read(&zram->stats.zero_pages),
435 			pool_stats.pages_compacted);
436 	up_read(&zram->init_lock);
437 
438 	return ret;
439 }
440 
441 static ssize_t debug_stat_show(struct device *dev,
442 		struct device_attribute *attr, char *buf)
443 {
444 	int version = 1;
445 	struct zram *zram = dev_to_zram(dev);
446 	ssize_t ret;
447 
448 	down_read(&zram->init_lock);
449 	ret = scnprintf(buf, PAGE_SIZE,
450 			"version: %d\n%8llu\n",
451 			version,
452 			(u64)atomic64_read(&zram->stats.writestall));
453 	up_read(&zram->init_lock);
454 
455 	return ret;
456 }
457 
458 static DEVICE_ATTR_RO(io_stat);
459 static DEVICE_ATTR_RO(mm_stat);
460 static DEVICE_ATTR_RO(debug_stat);
461 ZRAM_ATTR_RO(num_reads);
462 ZRAM_ATTR_RO(num_writes);
463 ZRAM_ATTR_RO(failed_reads);
464 ZRAM_ATTR_RO(failed_writes);
465 ZRAM_ATTR_RO(invalid_io);
466 ZRAM_ATTR_RO(notify_free);
467 ZRAM_ATTR_RO(zero_pages);
468 ZRAM_ATTR_RO(compr_data_size);
469 
470 static inline bool zram_meta_get(struct zram *zram)
471 {
472 	if (atomic_inc_not_zero(&zram->refcount))
473 		return true;
474 	return false;
475 }
476 
477 static inline void zram_meta_put(struct zram *zram)
478 {
479 	atomic_dec(&zram->refcount);
480 }
481 
482 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
483 {
484 	size_t num_pages = disksize >> PAGE_SHIFT;
485 	size_t index;
486 
487 	/* Free all pages that are still in this zram device */
488 	for (index = 0; index < num_pages; index++) {
489 		unsigned long handle = meta->table[index].handle;
490 
491 		if (!handle)
492 			continue;
493 
494 		zs_free(meta->mem_pool, handle);
495 	}
496 
497 	zs_destroy_pool(meta->mem_pool);
498 	vfree(meta->table);
499 	kfree(meta);
500 }
501 
502 static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
503 {
504 	size_t num_pages;
505 	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
506 
507 	if (!meta)
508 		return NULL;
509 
510 	num_pages = disksize >> PAGE_SHIFT;
511 	meta->table = vzalloc(num_pages * sizeof(*meta->table));
512 	if (!meta->table) {
513 		pr_err("Error allocating zram address table\n");
514 		goto out_error;
515 	}
516 
517 	meta->mem_pool = zs_create_pool(pool_name);
518 	if (!meta->mem_pool) {
519 		pr_err("Error creating memory pool\n");
520 		goto out_error;
521 	}
522 
523 	return meta;
524 
525 out_error:
526 	vfree(meta->table);
527 	kfree(meta);
528 	return NULL;
529 }
530 
531 /*
532  * To protect concurrent access to the same index entry,
533  * caller should hold this table index entry's bit_spinlock to
534  * indicate this index entry is accessing.
535  */
536 static void zram_free_page(struct zram *zram, size_t index)
537 {
538 	struct zram_meta *meta = zram->meta;
539 	unsigned long handle = meta->table[index].handle;
540 
541 	if (unlikely(!handle)) {
542 		/*
543 		 * No memory is allocated for zero filled pages.
544 		 * Simply clear zero page flag.
545 		 */
546 		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
547 			zram_clear_flag(meta, index, ZRAM_ZERO);
548 			atomic64_dec(&zram->stats.zero_pages);
549 		}
550 		return;
551 	}
552 
553 	zs_free(meta->mem_pool, handle);
554 
555 	atomic64_sub(zram_get_obj_size(meta, index),
556 			&zram->stats.compr_data_size);
557 	atomic64_dec(&zram->stats.pages_stored);
558 
559 	meta->table[index].handle = 0;
560 	zram_set_obj_size(meta, index, 0);
561 }
562 
563 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
564 {
565 	int ret = 0;
566 	unsigned char *cmem;
567 	struct zram_meta *meta = zram->meta;
568 	unsigned long handle;
569 	unsigned int size;
570 
571 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
572 	handle = meta->table[index].handle;
573 	size = zram_get_obj_size(meta, index);
574 
575 	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
576 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
577 		clear_page(mem);
578 		return 0;
579 	}
580 
581 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
582 	if (size == PAGE_SIZE) {
583 		copy_page(mem, cmem);
584 	} else {
585 		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
586 
587 		ret = zcomp_decompress(zstrm, cmem, size, mem);
588 		zcomp_stream_put(zram->comp);
589 	}
590 	zs_unmap_object(meta->mem_pool, handle);
591 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
592 
593 	/* Should NEVER happen. Return bio error if it does. */
594 	if (unlikely(ret)) {
595 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
596 		return ret;
597 	}
598 
599 	return 0;
600 }
601 
602 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
603 			  u32 index, int offset)
604 {
605 	int ret;
606 	struct page *page;
607 	unsigned char *user_mem, *uncmem = NULL;
608 	struct zram_meta *meta = zram->meta;
609 	page = bvec->bv_page;
610 
611 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
612 	if (unlikely(!meta->table[index].handle) ||
613 			zram_test_flag(meta, index, ZRAM_ZERO)) {
614 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
615 		handle_zero_page(bvec);
616 		return 0;
617 	}
618 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
619 
620 	if (is_partial_io(bvec))
621 		/* Use  a temporary buffer to decompress the page */
622 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
623 
624 	user_mem = kmap_atomic(page);
625 	if (!is_partial_io(bvec))
626 		uncmem = user_mem;
627 
628 	if (!uncmem) {
629 		pr_err("Unable to allocate temp memory\n");
630 		ret = -ENOMEM;
631 		goto out_cleanup;
632 	}
633 
634 	ret = zram_decompress_page(zram, uncmem, index);
635 	/* Should NEVER happen. Return bio error if it does. */
636 	if (unlikely(ret))
637 		goto out_cleanup;
638 
639 	if (is_partial_io(bvec))
640 		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
641 				bvec->bv_len);
642 
643 	flush_dcache_page(page);
644 	ret = 0;
645 out_cleanup:
646 	kunmap_atomic(user_mem);
647 	if (is_partial_io(bvec))
648 		kfree(uncmem);
649 	return ret;
650 }
651 
652 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
653 			   int offset)
654 {
655 	int ret = 0;
656 	unsigned int clen;
657 	unsigned long handle = 0;
658 	struct page *page;
659 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
660 	struct zram_meta *meta = zram->meta;
661 	struct zcomp_strm *zstrm = NULL;
662 	unsigned long alloced_pages;
663 
664 	page = bvec->bv_page;
665 	if (is_partial_io(bvec)) {
666 		/*
667 		 * This is a partial IO. We need to read the full page
668 		 * before to write the changes.
669 		 */
670 		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
671 		if (!uncmem) {
672 			ret = -ENOMEM;
673 			goto out;
674 		}
675 		ret = zram_decompress_page(zram, uncmem, index);
676 		if (ret)
677 			goto out;
678 	}
679 
680 compress_again:
681 	user_mem = kmap_atomic(page);
682 	if (is_partial_io(bvec)) {
683 		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
684 		       bvec->bv_len);
685 		kunmap_atomic(user_mem);
686 		user_mem = NULL;
687 	} else {
688 		uncmem = user_mem;
689 	}
690 
691 	if (page_zero_filled(uncmem)) {
692 		if (user_mem)
693 			kunmap_atomic(user_mem);
694 		/* Free memory associated with this sector now. */
695 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
696 		zram_free_page(zram, index);
697 		zram_set_flag(meta, index, ZRAM_ZERO);
698 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
699 
700 		atomic64_inc(&zram->stats.zero_pages);
701 		ret = 0;
702 		goto out;
703 	}
704 
705 	zstrm = zcomp_stream_get(zram->comp);
706 	ret = zcomp_compress(zstrm, uncmem, &clen);
707 	if (!is_partial_io(bvec)) {
708 		kunmap_atomic(user_mem);
709 		user_mem = NULL;
710 		uncmem = NULL;
711 	}
712 
713 	if (unlikely(ret)) {
714 		pr_err("Compression failed! err=%d\n", ret);
715 		goto out;
716 	}
717 
718 	src = zstrm->buffer;
719 	if (unlikely(clen > max_zpage_size)) {
720 		clen = PAGE_SIZE;
721 		if (is_partial_io(bvec))
722 			src = uncmem;
723 	}
724 
725 	/*
726 	 * handle allocation has 2 paths:
727 	 * a) fast path is executed with preemption disabled (for
728 	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
729 	 *  since we can't sleep;
730 	 * b) slow path enables preemption and attempts to allocate
731 	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
732 	 *  put per-cpu compression stream and, thus, to re-do
733 	 *  the compression once handle is allocated.
734 	 *
735 	 * if we have a 'non-null' handle here then we are coming
736 	 * from the slow path and handle has already been allocated.
737 	 */
738 	if (!handle)
739 		handle = zs_malloc(meta->mem_pool, clen,
740 				__GFP_KSWAPD_RECLAIM |
741 				__GFP_NOWARN |
742 				__GFP_HIGHMEM |
743 				__GFP_MOVABLE);
744 	if (!handle) {
745 		zcomp_stream_put(zram->comp);
746 		zstrm = NULL;
747 
748 		atomic64_inc(&zram->stats.writestall);
749 
750 		handle = zs_malloc(meta->mem_pool, clen,
751 				GFP_NOIO | __GFP_HIGHMEM |
752 				__GFP_MOVABLE);
753 		if (handle)
754 			goto compress_again;
755 
756 		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
757 			index, clen);
758 		ret = -ENOMEM;
759 		goto out;
760 	}
761 
762 	alloced_pages = zs_get_total_pages(meta->mem_pool);
763 	update_used_max(zram, alloced_pages);
764 
765 	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
766 		zs_free(meta->mem_pool, handle);
767 		ret = -ENOMEM;
768 		goto out;
769 	}
770 
771 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
772 
773 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
774 		src = kmap_atomic(page);
775 		copy_page(cmem, src);
776 		kunmap_atomic(src);
777 	} else {
778 		memcpy(cmem, src, clen);
779 	}
780 
781 	zcomp_stream_put(zram->comp);
782 	zstrm = NULL;
783 	zs_unmap_object(meta->mem_pool, handle);
784 
785 	/*
786 	 * Free memory associated with this sector
787 	 * before overwriting unused sectors.
788 	 */
789 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
790 	zram_free_page(zram, index);
791 
792 	meta->table[index].handle = handle;
793 	zram_set_obj_size(meta, index, clen);
794 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
795 
796 	/* Update stats */
797 	atomic64_add(clen, &zram->stats.compr_data_size);
798 	atomic64_inc(&zram->stats.pages_stored);
799 out:
800 	if (zstrm)
801 		zcomp_stream_put(zram->comp);
802 	if (is_partial_io(bvec))
803 		kfree(uncmem);
804 	return ret;
805 }
806 
807 /*
808  * zram_bio_discard - handler on discard request
809  * @index: physical block index in PAGE_SIZE units
810  * @offset: byte offset within physical block
811  */
812 static void zram_bio_discard(struct zram *zram, u32 index,
813 			     int offset, struct bio *bio)
814 {
815 	size_t n = bio->bi_iter.bi_size;
816 	struct zram_meta *meta = zram->meta;
817 
818 	/*
819 	 * zram manages data in physical block size units. Because logical block
820 	 * size isn't identical with physical block size on some arch, we
821 	 * could get a discard request pointing to a specific offset within a
822 	 * certain physical block.  Although we can handle this request by
823 	 * reading that physiclal block and decompressing and partially zeroing
824 	 * and re-compressing and then re-storing it, this isn't reasonable
825 	 * because our intent with a discard request is to save memory.  So
826 	 * skipping this logical block is appropriate here.
827 	 */
828 	if (offset) {
829 		if (n <= (PAGE_SIZE - offset))
830 			return;
831 
832 		n -= (PAGE_SIZE - offset);
833 		index++;
834 	}
835 
836 	while (n >= PAGE_SIZE) {
837 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
838 		zram_free_page(zram, index);
839 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
840 		atomic64_inc(&zram->stats.notify_free);
841 		index++;
842 		n -= PAGE_SIZE;
843 	}
844 }
845 
846 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
847 			int offset, bool is_write)
848 {
849 	unsigned long start_time = jiffies;
850 	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
851 	int ret;
852 
853 	generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
854 			&zram->disk->part0);
855 
856 	if (!is_write) {
857 		atomic64_inc(&zram->stats.num_reads);
858 		ret = zram_bvec_read(zram, bvec, index, offset);
859 	} else {
860 		atomic64_inc(&zram->stats.num_writes);
861 		ret = zram_bvec_write(zram, bvec, index, offset);
862 	}
863 
864 	generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
865 
866 	if (unlikely(ret)) {
867 		if (!is_write)
868 			atomic64_inc(&zram->stats.failed_reads);
869 		else
870 			atomic64_inc(&zram->stats.failed_writes);
871 	}
872 
873 	return ret;
874 }
875 
876 static void __zram_make_request(struct zram *zram, struct bio *bio)
877 {
878 	int offset;
879 	u32 index;
880 	struct bio_vec bvec;
881 	struct bvec_iter iter;
882 
883 	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
884 	offset = (bio->bi_iter.bi_sector &
885 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
886 
887 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
888 		zram_bio_discard(zram, index, offset, bio);
889 		bio_endio(bio);
890 		return;
891 	}
892 
893 	bio_for_each_segment(bvec, bio, iter) {
894 		int max_transfer_size = PAGE_SIZE - offset;
895 
896 		if (bvec.bv_len > max_transfer_size) {
897 			/*
898 			 * zram_bvec_rw() can only make operation on a single
899 			 * zram page. Split the bio vector.
900 			 */
901 			struct bio_vec bv;
902 
903 			bv.bv_page = bvec.bv_page;
904 			bv.bv_len = max_transfer_size;
905 			bv.bv_offset = bvec.bv_offset;
906 
907 			if (zram_bvec_rw(zram, &bv, index, offset,
908 					 op_is_write(bio_op(bio))) < 0)
909 				goto out;
910 
911 			bv.bv_len = bvec.bv_len - max_transfer_size;
912 			bv.bv_offset += max_transfer_size;
913 			if (zram_bvec_rw(zram, &bv, index + 1, 0,
914 					 op_is_write(bio_op(bio))) < 0)
915 				goto out;
916 		} else
917 			if (zram_bvec_rw(zram, &bvec, index, offset,
918 					 op_is_write(bio_op(bio))) < 0)
919 				goto out;
920 
921 		update_position(&index, &offset, &bvec);
922 	}
923 
924 	bio_endio(bio);
925 	return;
926 
927 out:
928 	bio_io_error(bio);
929 }
930 
931 /*
932  * Handler function for all zram I/O requests.
933  */
934 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
935 {
936 	struct zram *zram = queue->queuedata;
937 
938 	if (unlikely(!zram_meta_get(zram)))
939 		goto error;
940 
941 	blk_queue_split(queue, &bio, queue->bio_split);
942 
943 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
944 					bio->bi_iter.bi_size)) {
945 		atomic64_inc(&zram->stats.invalid_io);
946 		goto put_zram;
947 	}
948 
949 	__zram_make_request(zram, bio);
950 	zram_meta_put(zram);
951 	return BLK_QC_T_NONE;
952 put_zram:
953 	zram_meta_put(zram);
954 error:
955 	bio_io_error(bio);
956 	return BLK_QC_T_NONE;
957 }
958 
959 static void zram_slot_free_notify(struct block_device *bdev,
960 				unsigned long index)
961 {
962 	struct zram *zram;
963 	struct zram_meta *meta;
964 
965 	zram = bdev->bd_disk->private_data;
966 	meta = zram->meta;
967 
968 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
969 	zram_free_page(zram, index);
970 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
971 	atomic64_inc(&zram->stats.notify_free);
972 }
973 
974 static int zram_rw_page(struct block_device *bdev, sector_t sector,
975 		       struct page *page, bool is_write)
976 {
977 	int offset, err = -EIO;
978 	u32 index;
979 	struct zram *zram;
980 	struct bio_vec bv;
981 
982 	zram = bdev->bd_disk->private_data;
983 	if (unlikely(!zram_meta_get(zram)))
984 		goto out;
985 
986 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
987 		atomic64_inc(&zram->stats.invalid_io);
988 		err = -EINVAL;
989 		goto put_zram;
990 	}
991 
992 	index = sector >> SECTORS_PER_PAGE_SHIFT;
993 	offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
994 
995 	bv.bv_page = page;
996 	bv.bv_len = PAGE_SIZE;
997 	bv.bv_offset = 0;
998 
999 	err = zram_bvec_rw(zram, &bv, index, offset, is_write);
1000 put_zram:
1001 	zram_meta_put(zram);
1002 out:
1003 	/*
1004 	 * If I/O fails, just return error(ie, non-zero) without
1005 	 * calling page_endio.
1006 	 * It causes resubmit the I/O with bio request by upper functions
1007 	 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1008 	 * bio->bi_end_io does things to handle the error
1009 	 * (e.g., SetPageError, set_page_dirty and extra works).
1010 	 */
1011 	if (err == 0)
1012 		page_endio(page, is_write, 0);
1013 	return err;
1014 }
1015 
1016 static void zram_reset_device(struct zram *zram)
1017 {
1018 	struct zram_meta *meta;
1019 	struct zcomp *comp;
1020 	u64 disksize;
1021 
1022 	down_write(&zram->init_lock);
1023 
1024 	zram->limit_pages = 0;
1025 
1026 	if (!init_done(zram)) {
1027 		up_write(&zram->init_lock);
1028 		return;
1029 	}
1030 
1031 	meta = zram->meta;
1032 	comp = zram->comp;
1033 	disksize = zram->disksize;
1034 	/*
1035 	 * Refcount will go down to 0 eventually and r/w handler
1036 	 * cannot handle further I/O so it will bail out by
1037 	 * check zram_meta_get.
1038 	 */
1039 	zram_meta_put(zram);
1040 	/*
1041 	 * We want to free zram_meta in process context to avoid
1042 	 * deadlock between reclaim path and any other locks.
1043 	 */
1044 	wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
1045 
1046 	/* Reset stats */
1047 	memset(&zram->stats, 0, sizeof(zram->stats));
1048 	zram->disksize = 0;
1049 
1050 	set_capacity(zram->disk, 0);
1051 	part_stat_set_all(&zram->disk->part0, 0);
1052 
1053 	up_write(&zram->init_lock);
1054 	/* I/O operation under all of CPU are done so let's free */
1055 	zram_meta_free(meta, disksize);
1056 	zcomp_destroy(comp);
1057 }
1058 
1059 static ssize_t disksize_store(struct device *dev,
1060 		struct device_attribute *attr, const char *buf, size_t len)
1061 {
1062 	u64 disksize;
1063 	struct zcomp *comp;
1064 	struct zram_meta *meta;
1065 	struct zram *zram = dev_to_zram(dev);
1066 	int err;
1067 
1068 	disksize = memparse(buf, NULL);
1069 	if (!disksize)
1070 		return -EINVAL;
1071 
1072 	disksize = PAGE_ALIGN(disksize);
1073 	meta = zram_meta_alloc(zram->disk->disk_name, disksize);
1074 	if (!meta)
1075 		return -ENOMEM;
1076 
1077 	comp = zcomp_create(zram->compressor);
1078 	if (IS_ERR(comp)) {
1079 		pr_err("Cannot initialise %s compressing backend\n",
1080 				zram->compressor);
1081 		err = PTR_ERR(comp);
1082 		goto out_free_meta;
1083 	}
1084 
1085 	down_write(&zram->init_lock);
1086 	if (init_done(zram)) {
1087 		pr_info("Cannot change disksize for initialized device\n");
1088 		err = -EBUSY;
1089 		goto out_destroy_comp;
1090 	}
1091 
1092 	init_waitqueue_head(&zram->io_done);
1093 	atomic_set(&zram->refcount, 1);
1094 	zram->meta = meta;
1095 	zram->comp = comp;
1096 	zram->disksize = disksize;
1097 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1098 	up_write(&zram->init_lock);
1099 
1100 	/*
1101 	 * Revalidate disk out of the init_lock to avoid lockdep splat.
1102 	 * It's okay because disk's capacity is protected by init_lock
1103 	 * so that revalidate_disk always sees up-to-date capacity.
1104 	 */
1105 	revalidate_disk(zram->disk);
1106 
1107 	return len;
1108 
1109 out_destroy_comp:
1110 	up_write(&zram->init_lock);
1111 	zcomp_destroy(comp);
1112 out_free_meta:
1113 	zram_meta_free(meta, disksize);
1114 	return err;
1115 }
1116 
1117 static ssize_t reset_store(struct device *dev,
1118 		struct device_attribute *attr, const char *buf, size_t len)
1119 {
1120 	int ret;
1121 	unsigned short do_reset;
1122 	struct zram *zram;
1123 	struct block_device *bdev;
1124 
1125 	ret = kstrtou16(buf, 10, &do_reset);
1126 	if (ret)
1127 		return ret;
1128 
1129 	if (!do_reset)
1130 		return -EINVAL;
1131 
1132 	zram = dev_to_zram(dev);
1133 	bdev = bdget_disk(zram->disk, 0);
1134 	if (!bdev)
1135 		return -ENOMEM;
1136 
1137 	mutex_lock(&bdev->bd_mutex);
1138 	/* Do not reset an active device or claimed device */
1139 	if (bdev->bd_openers || zram->claim) {
1140 		mutex_unlock(&bdev->bd_mutex);
1141 		bdput(bdev);
1142 		return -EBUSY;
1143 	}
1144 
1145 	/* From now on, anyone can't open /dev/zram[0-9] */
1146 	zram->claim = true;
1147 	mutex_unlock(&bdev->bd_mutex);
1148 
1149 	/* Make sure all the pending I/O are finished */
1150 	fsync_bdev(bdev);
1151 	zram_reset_device(zram);
1152 	revalidate_disk(zram->disk);
1153 	bdput(bdev);
1154 
1155 	mutex_lock(&bdev->bd_mutex);
1156 	zram->claim = false;
1157 	mutex_unlock(&bdev->bd_mutex);
1158 
1159 	return len;
1160 }
1161 
1162 static int zram_open(struct block_device *bdev, fmode_t mode)
1163 {
1164 	int ret = 0;
1165 	struct zram *zram;
1166 
1167 	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1168 
1169 	zram = bdev->bd_disk->private_data;
1170 	/* zram was claimed to reset so open request fails */
1171 	if (zram->claim)
1172 		ret = -EBUSY;
1173 
1174 	return ret;
1175 }
1176 
1177 static const struct block_device_operations zram_devops = {
1178 	.open = zram_open,
1179 	.swap_slot_free_notify = zram_slot_free_notify,
1180 	.rw_page = zram_rw_page,
1181 	.owner = THIS_MODULE
1182 };
1183 
1184 static DEVICE_ATTR_WO(compact);
1185 static DEVICE_ATTR_RW(disksize);
1186 static DEVICE_ATTR_RO(initstate);
1187 static DEVICE_ATTR_WO(reset);
1188 static DEVICE_ATTR_RO(orig_data_size);
1189 static DEVICE_ATTR_RO(mem_used_total);
1190 static DEVICE_ATTR_RW(mem_limit);
1191 static DEVICE_ATTR_RW(mem_used_max);
1192 static DEVICE_ATTR_RW(max_comp_streams);
1193 static DEVICE_ATTR_RW(comp_algorithm);
1194 
1195 static struct attribute *zram_disk_attrs[] = {
1196 	&dev_attr_disksize.attr,
1197 	&dev_attr_initstate.attr,
1198 	&dev_attr_reset.attr,
1199 	&dev_attr_num_reads.attr,
1200 	&dev_attr_num_writes.attr,
1201 	&dev_attr_failed_reads.attr,
1202 	&dev_attr_failed_writes.attr,
1203 	&dev_attr_compact.attr,
1204 	&dev_attr_invalid_io.attr,
1205 	&dev_attr_notify_free.attr,
1206 	&dev_attr_zero_pages.attr,
1207 	&dev_attr_orig_data_size.attr,
1208 	&dev_attr_compr_data_size.attr,
1209 	&dev_attr_mem_used_total.attr,
1210 	&dev_attr_mem_limit.attr,
1211 	&dev_attr_mem_used_max.attr,
1212 	&dev_attr_max_comp_streams.attr,
1213 	&dev_attr_comp_algorithm.attr,
1214 	&dev_attr_io_stat.attr,
1215 	&dev_attr_mm_stat.attr,
1216 	&dev_attr_debug_stat.attr,
1217 	NULL,
1218 };
1219 
1220 static struct attribute_group zram_disk_attr_group = {
1221 	.attrs = zram_disk_attrs,
1222 };
1223 
1224 /*
1225  * Allocate and initialize new zram device. the function returns
1226  * '>= 0' device_id upon success, and negative value otherwise.
1227  */
1228 static int zram_add(void)
1229 {
1230 	struct zram *zram;
1231 	struct request_queue *queue;
1232 	int ret, device_id;
1233 
1234 	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1235 	if (!zram)
1236 		return -ENOMEM;
1237 
1238 	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1239 	if (ret < 0)
1240 		goto out_free_dev;
1241 	device_id = ret;
1242 
1243 	init_rwsem(&zram->init_lock);
1244 
1245 	queue = blk_alloc_queue(GFP_KERNEL);
1246 	if (!queue) {
1247 		pr_err("Error allocating disk queue for device %d\n",
1248 			device_id);
1249 		ret = -ENOMEM;
1250 		goto out_free_idr;
1251 	}
1252 
1253 	blk_queue_make_request(queue, zram_make_request);
1254 
1255 	/* gendisk structure */
1256 	zram->disk = alloc_disk(1);
1257 	if (!zram->disk) {
1258 		pr_err("Error allocating disk structure for device %d\n",
1259 			device_id);
1260 		ret = -ENOMEM;
1261 		goto out_free_queue;
1262 	}
1263 
1264 	zram->disk->major = zram_major;
1265 	zram->disk->first_minor = device_id;
1266 	zram->disk->fops = &zram_devops;
1267 	zram->disk->queue = queue;
1268 	zram->disk->queue->queuedata = zram;
1269 	zram->disk->private_data = zram;
1270 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1271 
1272 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1273 	set_capacity(zram->disk, 0);
1274 	/* zram devices sort of resembles non-rotational disks */
1275 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1276 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1277 	/*
1278 	 * To ensure that we always get PAGE_SIZE aligned
1279 	 * and n*PAGE_SIZED sized I/O requests.
1280 	 */
1281 	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1282 	blk_queue_logical_block_size(zram->disk->queue,
1283 					ZRAM_LOGICAL_BLOCK_SIZE);
1284 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1285 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1286 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1287 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1288 	/*
1289 	 * zram_bio_discard() will clear all logical blocks if logical block
1290 	 * size is identical with physical block size(PAGE_SIZE). But if it is
1291 	 * different, we will skip discarding some parts of logical blocks in
1292 	 * the part of the request range which isn't aligned to physical block
1293 	 * size.  So we can't ensure that all discarded logical blocks are
1294 	 * zeroed.
1295 	 */
1296 	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1297 		zram->disk->queue->limits.discard_zeroes_data = 1;
1298 	else
1299 		zram->disk->queue->limits.discard_zeroes_data = 0;
1300 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1301 
1302 	add_disk(zram->disk);
1303 
1304 	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1305 				&zram_disk_attr_group);
1306 	if (ret < 0) {
1307 		pr_err("Error creating sysfs group for device %d\n",
1308 				device_id);
1309 		goto out_free_disk;
1310 	}
1311 	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1312 	zram->meta = NULL;
1313 
1314 	pr_info("Added device: %s\n", zram->disk->disk_name);
1315 	return device_id;
1316 
1317 out_free_disk:
1318 	del_gendisk(zram->disk);
1319 	put_disk(zram->disk);
1320 out_free_queue:
1321 	blk_cleanup_queue(queue);
1322 out_free_idr:
1323 	idr_remove(&zram_index_idr, device_id);
1324 out_free_dev:
1325 	kfree(zram);
1326 	return ret;
1327 }
1328 
1329 static int zram_remove(struct zram *zram)
1330 {
1331 	struct block_device *bdev;
1332 
1333 	bdev = bdget_disk(zram->disk, 0);
1334 	if (!bdev)
1335 		return -ENOMEM;
1336 
1337 	mutex_lock(&bdev->bd_mutex);
1338 	if (bdev->bd_openers || zram->claim) {
1339 		mutex_unlock(&bdev->bd_mutex);
1340 		bdput(bdev);
1341 		return -EBUSY;
1342 	}
1343 
1344 	zram->claim = true;
1345 	mutex_unlock(&bdev->bd_mutex);
1346 
1347 	/*
1348 	 * Remove sysfs first, so no one will perform a disksize
1349 	 * store while we destroy the devices. This also helps during
1350 	 * hot_remove -- zram_reset_device() is the last holder of
1351 	 * ->init_lock, no later/concurrent disksize_store() or any
1352 	 * other sysfs handlers are possible.
1353 	 */
1354 	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1355 			&zram_disk_attr_group);
1356 
1357 	/* Make sure all the pending I/O are finished */
1358 	fsync_bdev(bdev);
1359 	zram_reset_device(zram);
1360 	bdput(bdev);
1361 
1362 	pr_info("Removed device: %s\n", zram->disk->disk_name);
1363 
1364 	blk_cleanup_queue(zram->disk->queue);
1365 	del_gendisk(zram->disk);
1366 	put_disk(zram->disk);
1367 	kfree(zram);
1368 	return 0;
1369 }
1370 
1371 /* zram-control sysfs attributes */
1372 static ssize_t hot_add_show(struct class *class,
1373 			struct class_attribute *attr,
1374 			char *buf)
1375 {
1376 	int ret;
1377 
1378 	mutex_lock(&zram_index_mutex);
1379 	ret = zram_add();
1380 	mutex_unlock(&zram_index_mutex);
1381 
1382 	if (ret < 0)
1383 		return ret;
1384 	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1385 }
1386 
1387 static ssize_t hot_remove_store(struct class *class,
1388 			struct class_attribute *attr,
1389 			const char *buf,
1390 			size_t count)
1391 {
1392 	struct zram *zram;
1393 	int ret, dev_id;
1394 
1395 	/* dev_id is gendisk->first_minor, which is `int' */
1396 	ret = kstrtoint(buf, 10, &dev_id);
1397 	if (ret)
1398 		return ret;
1399 	if (dev_id < 0)
1400 		return -EINVAL;
1401 
1402 	mutex_lock(&zram_index_mutex);
1403 
1404 	zram = idr_find(&zram_index_idr, dev_id);
1405 	if (zram) {
1406 		ret = zram_remove(zram);
1407 		if (!ret)
1408 			idr_remove(&zram_index_idr, dev_id);
1409 	} else {
1410 		ret = -ENODEV;
1411 	}
1412 
1413 	mutex_unlock(&zram_index_mutex);
1414 	return ret ? ret : count;
1415 }
1416 
1417 /*
1418  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1419  * sense that reading from this file does alter the state of your system -- it
1420  * creates a new un-initialized zram device and returns back this device's
1421  * device_id (or an error code if it fails to create a new device).
1422  */
1423 static struct class_attribute zram_control_class_attrs[] = {
1424 	__ATTR(hot_add, 0400, hot_add_show, NULL),
1425 	__ATTR_WO(hot_remove),
1426 	__ATTR_NULL,
1427 };
1428 
1429 static struct class zram_control_class = {
1430 	.name		= "zram-control",
1431 	.owner		= THIS_MODULE,
1432 	.class_attrs	= zram_control_class_attrs,
1433 };
1434 
1435 static int zram_remove_cb(int id, void *ptr, void *data)
1436 {
1437 	zram_remove(ptr);
1438 	return 0;
1439 }
1440 
1441 static void destroy_devices(void)
1442 {
1443 	class_unregister(&zram_control_class);
1444 	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1445 	idr_destroy(&zram_index_idr);
1446 	unregister_blkdev(zram_major, "zram");
1447 	cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1448 }
1449 
1450 static int __init zram_init(void)
1451 {
1452 	int ret;
1453 
1454 	ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
1455 				      zcomp_cpu_up_prepare, zcomp_cpu_dead);
1456 	if (ret < 0)
1457 		return ret;
1458 
1459 	ret = class_register(&zram_control_class);
1460 	if (ret) {
1461 		pr_err("Unable to register zram-control class\n");
1462 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1463 		return ret;
1464 	}
1465 
1466 	zram_major = register_blkdev(0, "zram");
1467 	if (zram_major <= 0) {
1468 		pr_err("Unable to get major number\n");
1469 		class_unregister(&zram_control_class);
1470 		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1471 		return -EBUSY;
1472 	}
1473 
1474 	while (num_devices != 0) {
1475 		mutex_lock(&zram_index_mutex);
1476 		ret = zram_add();
1477 		mutex_unlock(&zram_index_mutex);
1478 		if (ret < 0)
1479 			goto out_error;
1480 		num_devices--;
1481 	}
1482 
1483 	return 0;
1484 
1485 out_error:
1486 	destroy_devices();
1487 	return ret;
1488 }
1489 
1490 static void __exit zram_exit(void)
1491 {
1492 	destroy_devices();
1493 }
1494 
1495 module_init(zram_init);
1496 module_exit(zram_exit);
1497 
1498 module_param(num_devices, uint, 0);
1499 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1500 
1501 MODULE_LICENSE("Dual BSD/GPL");
1502 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1503 MODULE_DESCRIPTION("Compressed RAM Block Device");
1504