1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/backing-dev.h> 29 #include <linux/string.h> 30 #include <linux/vmalloc.h> 31 #include <linux/err.h> 32 #include <linux/idr.h> 33 #include <linux/sysfs.h> 34 #include <linux/cpuhotplug.h> 35 36 #include "zram_drv.h" 37 38 static DEFINE_IDR(zram_index_idr); 39 /* idr index must be protected */ 40 static DEFINE_MUTEX(zram_index_mutex); 41 42 static int zram_major; 43 static const char *default_compressor = "lzo"; 44 45 /* Module params (documentation at end) */ 46 static unsigned int num_devices = 1; 47 48 static void zram_free_page(struct zram *zram, size_t index); 49 50 static inline bool init_done(struct zram *zram) 51 { 52 return zram->disksize; 53 } 54 55 static inline struct zram *dev_to_zram(struct device *dev) 56 { 57 return (struct zram *)dev_to_disk(dev)->private_data; 58 } 59 60 static unsigned long zram_get_handle(struct zram *zram, u32 index) 61 { 62 return zram->table[index].handle; 63 } 64 65 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 66 { 67 zram->table[index].handle = handle; 68 } 69 70 /* flag operations require table entry bit_spin_lock() being held */ 71 static int zram_test_flag(struct zram *zram, u32 index, 72 enum zram_pageflags flag) 73 { 74 return zram->table[index].value & BIT(flag); 75 } 76 77 static void zram_set_flag(struct zram *zram, u32 index, 78 enum zram_pageflags flag) 79 { 80 zram->table[index].value |= BIT(flag); 81 } 82 83 static void zram_clear_flag(struct zram *zram, u32 index, 84 enum zram_pageflags flag) 85 { 86 zram->table[index].value &= ~BIT(flag); 87 } 88 89 static inline void zram_set_element(struct zram *zram, u32 index, 90 unsigned long element) 91 { 92 zram->table[index].element = element; 93 } 94 95 static unsigned long zram_get_element(struct zram *zram, u32 index) 96 { 97 return zram->table[index].element; 98 } 99 100 static size_t zram_get_obj_size(struct zram *zram, u32 index) 101 { 102 return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 103 } 104 105 static void zram_set_obj_size(struct zram *zram, 106 u32 index, size_t size) 107 { 108 unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; 109 110 zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 111 } 112 113 #if PAGE_SIZE != 4096 114 static inline bool is_partial_io(struct bio_vec *bvec) 115 { 116 return bvec->bv_len != PAGE_SIZE; 117 } 118 #else 119 static inline bool is_partial_io(struct bio_vec *bvec) 120 { 121 return false; 122 } 123 #endif 124 125 /* 126 * Check if request is within bounds and aligned on zram logical blocks. 127 */ 128 static inline bool valid_io_request(struct zram *zram, 129 sector_t start, unsigned int size) 130 { 131 u64 end, bound; 132 133 /* unaligned request */ 134 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 135 return false; 136 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 137 return false; 138 139 end = start + (size >> SECTOR_SHIFT); 140 bound = zram->disksize >> SECTOR_SHIFT; 141 /* out of range range */ 142 if (unlikely(start >= bound || end > bound || start > end)) 143 return false; 144 145 /* I/O request is valid */ 146 return true; 147 } 148 149 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 150 { 151 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 152 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 153 } 154 155 static inline void update_used_max(struct zram *zram, 156 const unsigned long pages) 157 { 158 unsigned long old_max, cur_max; 159 160 old_max = atomic_long_read(&zram->stats.max_used_pages); 161 162 do { 163 cur_max = old_max; 164 if (pages > cur_max) 165 old_max = atomic_long_cmpxchg( 166 &zram->stats.max_used_pages, cur_max, pages); 167 } while (old_max != cur_max); 168 } 169 170 static inline void zram_fill_page(void *ptr, unsigned long len, 171 unsigned long value) 172 { 173 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 174 memset_l(ptr, value, len / sizeof(unsigned long)); 175 } 176 177 static bool page_same_filled(void *ptr, unsigned long *element) 178 { 179 unsigned int pos; 180 unsigned long *page; 181 unsigned long val; 182 183 page = (unsigned long *)ptr; 184 val = page[0]; 185 186 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 187 if (val != page[pos]) 188 return false; 189 } 190 191 *element = val; 192 193 return true; 194 } 195 196 static ssize_t initstate_show(struct device *dev, 197 struct device_attribute *attr, char *buf) 198 { 199 u32 val; 200 struct zram *zram = dev_to_zram(dev); 201 202 down_read(&zram->init_lock); 203 val = init_done(zram); 204 up_read(&zram->init_lock); 205 206 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 207 } 208 209 static ssize_t disksize_show(struct device *dev, 210 struct device_attribute *attr, char *buf) 211 { 212 struct zram *zram = dev_to_zram(dev); 213 214 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 215 } 216 217 static ssize_t mem_limit_store(struct device *dev, 218 struct device_attribute *attr, const char *buf, size_t len) 219 { 220 u64 limit; 221 char *tmp; 222 struct zram *zram = dev_to_zram(dev); 223 224 limit = memparse(buf, &tmp); 225 if (buf == tmp) /* no chars parsed, invalid input */ 226 return -EINVAL; 227 228 down_write(&zram->init_lock); 229 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 230 up_write(&zram->init_lock); 231 232 return len; 233 } 234 235 static ssize_t mem_used_max_store(struct device *dev, 236 struct device_attribute *attr, const char *buf, size_t len) 237 { 238 int err; 239 unsigned long val; 240 struct zram *zram = dev_to_zram(dev); 241 242 err = kstrtoul(buf, 10, &val); 243 if (err || val != 0) 244 return -EINVAL; 245 246 down_read(&zram->init_lock); 247 if (init_done(zram)) { 248 atomic_long_set(&zram->stats.max_used_pages, 249 zs_get_total_pages(zram->mem_pool)); 250 } 251 up_read(&zram->init_lock); 252 253 return len; 254 } 255 256 #ifdef CONFIG_ZRAM_WRITEBACK 257 static bool zram_wb_enabled(struct zram *zram) 258 { 259 return zram->backing_dev; 260 } 261 262 static void reset_bdev(struct zram *zram) 263 { 264 struct block_device *bdev; 265 266 if (!zram_wb_enabled(zram)) 267 return; 268 269 bdev = zram->bdev; 270 if (zram->old_block_size) 271 set_blocksize(bdev, zram->old_block_size); 272 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 273 /* hope filp_close flush all of IO */ 274 filp_close(zram->backing_dev, NULL); 275 zram->backing_dev = NULL; 276 zram->old_block_size = 0; 277 zram->bdev = NULL; 278 279 kvfree(zram->bitmap); 280 zram->bitmap = NULL; 281 } 282 283 static ssize_t backing_dev_show(struct device *dev, 284 struct device_attribute *attr, char *buf) 285 { 286 struct zram *zram = dev_to_zram(dev); 287 struct file *file = zram->backing_dev; 288 char *p; 289 ssize_t ret; 290 291 down_read(&zram->init_lock); 292 if (!zram_wb_enabled(zram)) { 293 memcpy(buf, "none\n", 5); 294 up_read(&zram->init_lock); 295 return 5; 296 } 297 298 p = file_path(file, buf, PAGE_SIZE - 1); 299 if (IS_ERR(p)) { 300 ret = PTR_ERR(p); 301 goto out; 302 } 303 304 ret = strlen(p); 305 memmove(buf, p, ret); 306 buf[ret++] = '\n'; 307 out: 308 up_read(&zram->init_lock); 309 return ret; 310 } 311 312 static ssize_t backing_dev_store(struct device *dev, 313 struct device_attribute *attr, const char *buf, size_t len) 314 { 315 char *file_name; 316 struct file *backing_dev = NULL; 317 struct inode *inode; 318 struct address_space *mapping; 319 unsigned int bitmap_sz, old_block_size = 0; 320 unsigned long nr_pages, *bitmap = NULL; 321 struct block_device *bdev = NULL; 322 int err; 323 struct zram *zram = dev_to_zram(dev); 324 325 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 326 if (!file_name) 327 return -ENOMEM; 328 329 down_write(&zram->init_lock); 330 if (init_done(zram)) { 331 pr_info("Can't setup backing device for initialized device\n"); 332 err = -EBUSY; 333 goto out; 334 } 335 336 strlcpy(file_name, buf, len); 337 338 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 339 if (IS_ERR(backing_dev)) { 340 err = PTR_ERR(backing_dev); 341 backing_dev = NULL; 342 goto out; 343 } 344 345 mapping = backing_dev->f_mapping; 346 inode = mapping->host; 347 348 /* Support only block device in this moment */ 349 if (!S_ISBLK(inode->i_mode)) { 350 err = -ENOTBLK; 351 goto out; 352 } 353 354 bdev = bdgrab(I_BDEV(inode)); 355 err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 356 if (err < 0) 357 goto out; 358 359 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 360 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 361 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 362 if (!bitmap) { 363 err = -ENOMEM; 364 goto out; 365 } 366 367 old_block_size = block_size(bdev); 368 err = set_blocksize(bdev, PAGE_SIZE); 369 if (err) 370 goto out; 371 372 reset_bdev(zram); 373 spin_lock_init(&zram->bitmap_lock); 374 375 zram->old_block_size = old_block_size; 376 zram->bdev = bdev; 377 zram->backing_dev = backing_dev; 378 zram->bitmap = bitmap; 379 zram->nr_pages = nr_pages; 380 up_write(&zram->init_lock); 381 382 pr_info("setup backing device %s\n", file_name); 383 kfree(file_name); 384 385 return len; 386 out: 387 if (bitmap) 388 kvfree(bitmap); 389 390 if (bdev) 391 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 392 393 if (backing_dev) 394 filp_close(backing_dev, NULL); 395 396 up_write(&zram->init_lock); 397 398 kfree(file_name); 399 400 return err; 401 } 402 403 static unsigned long get_entry_bdev(struct zram *zram) 404 { 405 unsigned long entry; 406 407 spin_lock(&zram->bitmap_lock); 408 /* skip 0 bit to confuse zram.handle = 0 */ 409 entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); 410 if (entry == zram->nr_pages) { 411 spin_unlock(&zram->bitmap_lock); 412 return 0; 413 } 414 415 set_bit(entry, zram->bitmap); 416 spin_unlock(&zram->bitmap_lock); 417 418 return entry; 419 } 420 421 static void put_entry_bdev(struct zram *zram, unsigned long entry) 422 { 423 int was_set; 424 425 spin_lock(&zram->bitmap_lock); 426 was_set = test_and_clear_bit(entry, zram->bitmap); 427 spin_unlock(&zram->bitmap_lock); 428 WARN_ON_ONCE(!was_set); 429 } 430 431 static void zram_page_end_io(struct bio *bio) 432 { 433 struct page *page = bio->bi_io_vec[0].bv_page; 434 435 page_endio(page, op_is_write(bio_op(bio)), 436 blk_status_to_errno(bio->bi_status)); 437 bio_put(bio); 438 } 439 440 /* 441 * Returns 1 if the submission is successful. 442 */ 443 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 444 unsigned long entry, struct bio *parent) 445 { 446 struct bio *bio; 447 448 bio = bio_alloc(GFP_ATOMIC, 1); 449 if (!bio) 450 return -ENOMEM; 451 452 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 453 bio_set_dev(bio, zram->bdev); 454 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 455 bio_put(bio); 456 return -EIO; 457 } 458 459 if (!parent) { 460 bio->bi_opf = REQ_OP_READ; 461 bio->bi_end_io = zram_page_end_io; 462 } else { 463 bio->bi_opf = parent->bi_opf; 464 bio_chain(bio, parent); 465 } 466 467 submit_bio(bio); 468 return 1; 469 } 470 471 struct zram_work { 472 struct work_struct work; 473 struct zram *zram; 474 unsigned long entry; 475 struct bio *bio; 476 }; 477 478 #if PAGE_SIZE != 4096 479 static void zram_sync_read(struct work_struct *work) 480 { 481 struct bio_vec bvec; 482 struct zram_work *zw = container_of(work, struct zram_work, work); 483 struct zram *zram = zw->zram; 484 unsigned long entry = zw->entry; 485 struct bio *bio = zw->bio; 486 487 read_from_bdev_async(zram, &bvec, entry, bio); 488 } 489 490 /* 491 * Block layer want one ->make_request_fn to be active at a time 492 * so if we use chained IO with parent IO in same context, 493 * it's a deadlock. To avoid, it, it uses worker thread context. 494 */ 495 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 496 unsigned long entry, struct bio *bio) 497 { 498 struct zram_work work; 499 500 work.zram = zram; 501 work.entry = entry; 502 work.bio = bio; 503 504 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 505 queue_work(system_unbound_wq, &work.work); 506 flush_work(&work.work); 507 destroy_work_on_stack(&work.work); 508 509 return 1; 510 } 511 #else 512 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 513 unsigned long entry, struct bio *bio) 514 { 515 WARN_ON(1); 516 return -EIO; 517 } 518 #endif 519 520 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 521 unsigned long entry, struct bio *parent, bool sync) 522 { 523 if (sync) 524 return read_from_bdev_sync(zram, bvec, entry, parent); 525 else 526 return read_from_bdev_async(zram, bvec, entry, parent); 527 } 528 529 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 530 u32 index, struct bio *parent, 531 unsigned long *pentry) 532 { 533 struct bio *bio; 534 unsigned long entry; 535 536 bio = bio_alloc(GFP_ATOMIC, 1); 537 if (!bio) 538 return -ENOMEM; 539 540 entry = get_entry_bdev(zram); 541 if (!entry) { 542 bio_put(bio); 543 return -ENOSPC; 544 } 545 546 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 547 bio_set_dev(bio, zram->bdev); 548 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, 549 bvec->bv_offset)) { 550 bio_put(bio); 551 put_entry_bdev(zram, entry); 552 return -EIO; 553 } 554 555 if (!parent) { 556 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 557 bio->bi_end_io = zram_page_end_io; 558 } else { 559 bio->bi_opf = parent->bi_opf; 560 bio_chain(bio, parent); 561 } 562 563 submit_bio(bio); 564 *pentry = entry; 565 566 return 0; 567 } 568 569 static void zram_wb_clear(struct zram *zram, u32 index) 570 { 571 unsigned long entry; 572 573 zram_clear_flag(zram, index, ZRAM_WB); 574 entry = zram_get_element(zram, index); 575 zram_set_element(zram, index, 0); 576 put_entry_bdev(zram, entry); 577 } 578 579 #else 580 static bool zram_wb_enabled(struct zram *zram) { return false; } 581 static inline void reset_bdev(struct zram *zram) {}; 582 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 583 u32 index, struct bio *parent, 584 unsigned long *pentry) 585 586 { 587 return -EIO; 588 } 589 590 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 591 unsigned long entry, struct bio *parent, bool sync) 592 { 593 return -EIO; 594 } 595 static void zram_wb_clear(struct zram *zram, u32 index) {} 596 #endif 597 598 599 /* 600 * We switched to per-cpu streams and this attr is not needed anymore. 601 * However, we will keep it around for some time, because: 602 * a) we may revert per-cpu streams in the future 603 * b) it's visible to user space and we need to follow our 2 years 604 * retirement rule; but we already have a number of 'soon to be 605 * altered' attrs, so max_comp_streams need to wait for the next 606 * layoff cycle. 607 */ 608 static ssize_t max_comp_streams_show(struct device *dev, 609 struct device_attribute *attr, char *buf) 610 { 611 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 612 } 613 614 static ssize_t max_comp_streams_store(struct device *dev, 615 struct device_attribute *attr, const char *buf, size_t len) 616 { 617 return len; 618 } 619 620 static ssize_t comp_algorithm_show(struct device *dev, 621 struct device_attribute *attr, char *buf) 622 { 623 size_t sz; 624 struct zram *zram = dev_to_zram(dev); 625 626 down_read(&zram->init_lock); 627 sz = zcomp_available_show(zram->compressor, buf); 628 up_read(&zram->init_lock); 629 630 return sz; 631 } 632 633 static ssize_t comp_algorithm_store(struct device *dev, 634 struct device_attribute *attr, const char *buf, size_t len) 635 { 636 struct zram *zram = dev_to_zram(dev); 637 char compressor[ARRAY_SIZE(zram->compressor)]; 638 size_t sz; 639 640 strlcpy(compressor, buf, sizeof(compressor)); 641 /* ignore trailing newline */ 642 sz = strlen(compressor); 643 if (sz > 0 && compressor[sz - 1] == '\n') 644 compressor[sz - 1] = 0x00; 645 646 if (!zcomp_available_algorithm(compressor)) 647 return -EINVAL; 648 649 down_write(&zram->init_lock); 650 if (init_done(zram)) { 651 up_write(&zram->init_lock); 652 pr_info("Can't change algorithm for initialized device\n"); 653 return -EBUSY; 654 } 655 656 strcpy(zram->compressor, compressor); 657 up_write(&zram->init_lock); 658 return len; 659 } 660 661 static ssize_t compact_store(struct device *dev, 662 struct device_attribute *attr, const char *buf, size_t len) 663 { 664 struct zram *zram = dev_to_zram(dev); 665 666 down_read(&zram->init_lock); 667 if (!init_done(zram)) { 668 up_read(&zram->init_lock); 669 return -EINVAL; 670 } 671 672 zs_compact(zram->mem_pool); 673 up_read(&zram->init_lock); 674 675 return len; 676 } 677 678 static ssize_t io_stat_show(struct device *dev, 679 struct device_attribute *attr, char *buf) 680 { 681 struct zram *zram = dev_to_zram(dev); 682 ssize_t ret; 683 684 down_read(&zram->init_lock); 685 ret = scnprintf(buf, PAGE_SIZE, 686 "%8llu %8llu %8llu %8llu\n", 687 (u64)atomic64_read(&zram->stats.failed_reads), 688 (u64)atomic64_read(&zram->stats.failed_writes), 689 (u64)atomic64_read(&zram->stats.invalid_io), 690 (u64)atomic64_read(&zram->stats.notify_free)); 691 up_read(&zram->init_lock); 692 693 return ret; 694 } 695 696 static ssize_t mm_stat_show(struct device *dev, 697 struct device_attribute *attr, char *buf) 698 { 699 struct zram *zram = dev_to_zram(dev); 700 struct zs_pool_stats pool_stats; 701 u64 orig_size, mem_used = 0; 702 long max_used; 703 ssize_t ret; 704 705 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 706 707 down_read(&zram->init_lock); 708 if (init_done(zram)) { 709 mem_used = zs_get_total_pages(zram->mem_pool); 710 zs_pool_stats(zram->mem_pool, &pool_stats); 711 } 712 713 orig_size = atomic64_read(&zram->stats.pages_stored); 714 max_used = atomic_long_read(&zram->stats.max_used_pages); 715 716 ret = scnprintf(buf, PAGE_SIZE, 717 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 718 orig_size << PAGE_SHIFT, 719 (u64)atomic64_read(&zram->stats.compr_data_size), 720 mem_used << PAGE_SHIFT, 721 zram->limit_pages << PAGE_SHIFT, 722 max_used << PAGE_SHIFT, 723 (u64)atomic64_read(&zram->stats.same_pages), 724 pool_stats.pages_compacted); 725 up_read(&zram->init_lock); 726 727 return ret; 728 } 729 730 static ssize_t debug_stat_show(struct device *dev, 731 struct device_attribute *attr, char *buf) 732 { 733 int version = 1; 734 struct zram *zram = dev_to_zram(dev); 735 ssize_t ret; 736 737 down_read(&zram->init_lock); 738 ret = scnprintf(buf, PAGE_SIZE, 739 "version: %d\n%8llu\n", 740 version, 741 (u64)atomic64_read(&zram->stats.writestall)); 742 up_read(&zram->init_lock); 743 744 return ret; 745 } 746 747 static DEVICE_ATTR_RO(io_stat); 748 static DEVICE_ATTR_RO(mm_stat); 749 static DEVICE_ATTR_RO(debug_stat); 750 751 static void zram_slot_lock(struct zram *zram, u32 index) 752 { 753 bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); 754 } 755 756 static void zram_slot_unlock(struct zram *zram, u32 index) 757 { 758 bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); 759 } 760 761 static void zram_meta_free(struct zram *zram, u64 disksize) 762 { 763 size_t num_pages = disksize >> PAGE_SHIFT; 764 size_t index; 765 766 /* Free all pages that are still in this zram device */ 767 for (index = 0; index < num_pages; index++) 768 zram_free_page(zram, index); 769 770 zs_destroy_pool(zram->mem_pool); 771 vfree(zram->table); 772 } 773 774 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 775 { 776 size_t num_pages; 777 778 num_pages = disksize >> PAGE_SHIFT; 779 zram->table = vzalloc(num_pages * sizeof(*zram->table)); 780 if (!zram->table) 781 return false; 782 783 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 784 if (!zram->mem_pool) { 785 vfree(zram->table); 786 return false; 787 } 788 789 return true; 790 } 791 792 /* 793 * To protect concurrent access to the same index entry, 794 * caller should hold this table index entry's bit_spinlock to 795 * indicate this index entry is accessing. 796 */ 797 static void zram_free_page(struct zram *zram, size_t index) 798 { 799 unsigned long handle; 800 801 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { 802 zram_wb_clear(zram, index); 803 atomic64_dec(&zram->stats.pages_stored); 804 return; 805 } 806 807 /* 808 * No memory is allocated for same element filled pages. 809 * Simply clear same page flag. 810 */ 811 if (zram_test_flag(zram, index, ZRAM_SAME)) { 812 zram_clear_flag(zram, index, ZRAM_SAME); 813 zram_set_element(zram, index, 0); 814 atomic64_dec(&zram->stats.same_pages); 815 atomic64_dec(&zram->stats.pages_stored); 816 return; 817 } 818 819 handle = zram_get_handle(zram, index); 820 if (!handle) 821 return; 822 823 zs_free(zram->mem_pool, handle); 824 825 atomic64_sub(zram_get_obj_size(zram, index), 826 &zram->stats.compr_data_size); 827 atomic64_dec(&zram->stats.pages_stored); 828 829 zram_set_handle(zram, index, 0); 830 zram_set_obj_size(zram, index, 0); 831 } 832 833 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 834 struct bio *bio, bool partial_io) 835 { 836 int ret; 837 unsigned long handle; 838 unsigned int size; 839 void *src, *dst; 840 841 if (zram_wb_enabled(zram)) { 842 zram_slot_lock(zram, index); 843 if (zram_test_flag(zram, index, ZRAM_WB)) { 844 struct bio_vec bvec; 845 846 zram_slot_unlock(zram, index); 847 848 bvec.bv_page = page; 849 bvec.bv_len = PAGE_SIZE; 850 bvec.bv_offset = 0; 851 return read_from_bdev(zram, &bvec, 852 zram_get_element(zram, index), 853 bio, partial_io); 854 } 855 zram_slot_unlock(zram, index); 856 } 857 858 zram_slot_lock(zram, index); 859 handle = zram_get_handle(zram, index); 860 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 861 unsigned long value; 862 void *mem; 863 864 value = handle ? zram_get_element(zram, index) : 0; 865 mem = kmap_atomic(page); 866 zram_fill_page(mem, PAGE_SIZE, value); 867 kunmap_atomic(mem); 868 zram_slot_unlock(zram, index); 869 return 0; 870 } 871 872 size = zram_get_obj_size(zram, index); 873 874 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 875 if (size == PAGE_SIZE) { 876 dst = kmap_atomic(page); 877 memcpy(dst, src, PAGE_SIZE); 878 kunmap_atomic(dst); 879 ret = 0; 880 } else { 881 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); 882 883 dst = kmap_atomic(page); 884 ret = zcomp_decompress(zstrm, src, size, dst); 885 kunmap_atomic(dst); 886 zcomp_stream_put(zram->comp); 887 } 888 zs_unmap_object(zram->mem_pool, handle); 889 zram_slot_unlock(zram, index); 890 891 /* Should NEVER happen. Return bio error if it does. */ 892 if (unlikely(ret)) 893 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 894 895 return ret; 896 } 897 898 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 899 u32 index, int offset, struct bio *bio) 900 { 901 int ret; 902 struct page *page; 903 904 page = bvec->bv_page; 905 if (is_partial_io(bvec)) { 906 /* Use a temporary buffer to decompress the page */ 907 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 908 if (!page) 909 return -ENOMEM; 910 } 911 912 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 913 if (unlikely(ret)) 914 goto out; 915 916 if (is_partial_io(bvec)) { 917 void *dst = kmap_atomic(bvec->bv_page); 918 void *src = kmap_atomic(page); 919 920 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 921 kunmap_atomic(src); 922 kunmap_atomic(dst); 923 } 924 out: 925 if (is_partial_io(bvec)) 926 __free_page(page); 927 928 return ret; 929 } 930 931 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 932 u32 index, struct bio *bio) 933 { 934 int ret = 0; 935 unsigned long alloced_pages; 936 unsigned long handle = 0; 937 unsigned int comp_len = 0; 938 void *src, *dst, *mem; 939 struct zcomp_strm *zstrm; 940 struct page *page = bvec->bv_page; 941 unsigned long element = 0; 942 enum zram_pageflags flags = 0; 943 bool allow_wb = true; 944 945 mem = kmap_atomic(page); 946 if (page_same_filled(mem, &element)) { 947 kunmap_atomic(mem); 948 /* Free memory associated with this sector now. */ 949 flags = ZRAM_SAME; 950 atomic64_inc(&zram->stats.same_pages); 951 goto out; 952 } 953 kunmap_atomic(mem); 954 955 compress_again: 956 zstrm = zcomp_stream_get(zram->comp); 957 src = kmap_atomic(page); 958 ret = zcomp_compress(zstrm, src, &comp_len); 959 kunmap_atomic(src); 960 961 if (unlikely(ret)) { 962 zcomp_stream_put(zram->comp); 963 pr_err("Compression failed! err=%d\n", ret); 964 zs_free(zram->mem_pool, handle); 965 return ret; 966 } 967 968 if (unlikely(comp_len > max_zpage_size)) { 969 if (zram_wb_enabled(zram) && allow_wb) { 970 zcomp_stream_put(zram->comp); 971 ret = write_to_bdev(zram, bvec, index, bio, &element); 972 if (!ret) { 973 flags = ZRAM_WB; 974 ret = 1; 975 goto out; 976 } 977 allow_wb = false; 978 goto compress_again; 979 } 980 comp_len = PAGE_SIZE; 981 } 982 983 /* 984 * handle allocation has 2 paths: 985 * a) fast path is executed with preemption disabled (for 986 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 987 * since we can't sleep; 988 * b) slow path enables preemption and attempts to allocate 989 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 990 * put per-cpu compression stream and, thus, to re-do 991 * the compression once handle is allocated. 992 * 993 * if we have a 'non-null' handle here then we are coming 994 * from the slow path and handle has already been allocated. 995 */ 996 if (!handle) 997 handle = zs_malloc(zram->mem_pool, comp_len, 998 __GFP_KSWAPD_RECLAIM | 999 __GFP_NOWARN | 1000 __GFP_HIGHMEM | 1001 __GFP_MOVABLE); 1002 if (!handle) { 1003 zcomp_stream_put(zram->comp); 1004 atomic64_inc(&zram->stats.writestall); 1005 handle = zs_malloc(zram->mem_pool, comp_len, 1006 GFP_NOIO | __GFP_HIGHMEM | 1007 __GFP_MOVABLE); 1008 if (handle) 1009 goto compress_again; 1010 return -ENOMEM; 1011 } 1012 1013 alloced_pages = zs_get_total_pages(zram->mem_pool); 1014 update_used_max(zram, alloced_pages); 1015 1016 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1017 zcomp_stream_put(zram->comp); 1018 zs_free(zram->mem_pool, handle); 1019 return -ENOMEM; 1020 } 1021 1022 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1023 1024 src = zstrm->buffer; 1025 if (comp_len == PAGE_SIZE) 1026 src = kmap_atomic(page); 1027 memcpy(dst, src, comp_len); 1028 if (comp_len == PAGE_SIZE) 1029 kunmap_atomic(src); 1030 1031 zcomp_stream_put(zram->comp); 1032 zs_unmap_object(zram->mem_pool, handle); 1033 atomic64_add(comp_len, &zram->stats.compr_data_size); 1034 out: 1035 /* 1036 * Free memory associated with this sector 1037 * before overwriting unused sectors. 1038 */ 1039 zram_slot_lock(zram, index); 1040 zram_free_page(zram, index); 1041 1042 if (flags) { 1043 zram_set_flag(zram, index, flags); 1044 zram_set_element(zram, index, element); 1045 } else { 1046 zram_set_handle(zram, index, handle); 1047 zram_set_obj_size(zram, index, comp_len); 1048 } 1049 zram_slot_unlock(zram, index); 1050 1051 /* Update stats */ 1052 atomic64_inc(&zram->stats.pages_stored); 1053 return ret; 1054 } 1055 1056 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1057 u32 index, int offset, struct bio *bio) 1058 { 1059 int ret; 1060 struct page *page = NULL; 1061 void *src; 1062 struct bio_vec vec; 1063 1064 vec = *bvec; 1065 if (is_partial_io(bvec)) { 1066 void *dst; 1067 /* 1068 * This is a partial IO. We need to read the full page 1069 * before to write the changes. 1070 */ 1071 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1072 if (!page) 1073 return -ENOMEM; 1074 1075 ret = __zram_bvec_read(zram, page, index, bio, true); 1076 if (ret) 1077 goto out; 1078 1079 src = kmap_atomic(bvec->bv_page); 1080 dst = kmap_atomic(page); 1081 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1082 kunmap_atomic(dst); 1083 kunmap_atomic(src); 1084 1085 vec.bv_page = page; 1086 vec.bv_len = PAGE_SIZE; 1087 vec.bv_offset = 0; 1088 } 1089 1090 ret = __zram_bvec_write(zram, &vec, index, bio); 1091 out: 1092 if (is_partial_io(bvec)) 1093 __free_page(page); 1094 return ret; 1095 } 1096 1097 /* 1098 * zram_bio_discard - handler on discard request 1099 * @index: physical block index in PAGE_SIZE units 1100 * @offset: byte offset within physical block 1101 */ 1102 static void zram_bio_discard(struct zram *zram, u32 index, 1103 int offset, struct bio *bio) 1104 { 1105 size_t n = bio->bi_iter.bi_size; 1106 1107 /* 1108 * zram manages data in physical block size units. Because logical block 1109 * size isn't identical with physical block size on some arch, we 1110 * could get a discard request pointing to a specific offset within a 1111 * certain physical block. Although we can handle this request by 1112 * reading that physiclal block and decompressing and partially zeroing 1113 * and re-compressing and then re-storing it, this isn't reasonable 1114 * because our intent with a discard request is to save memory. So 1115 * skipping this logical block is appropriate here. 1116 */ 1117 if (offset) { 1118 if (n <= (PAGE_SIZE - offset)) 1119 return; 1120 1121 n -= (PAGE_SIZE - offset); 1122 index++; 1123 } 1124 1125 while (n >= PAGE_SIZE) { 1126 zram_slot_lock(zram, index); 1127 zram_free_page(zram, index); 1128 zram_slot_unlock(zram, index); 1129 atomic64_inc(&zram->stats.notify_free); 1130 index++; 1131 n -= PAGE_SIZE; 1132 } 1133 } 1134 1135 /* 1136 * Returns errno if it has some problem. Otherwise return 0 or 1. 1137 * Returns 0 if IO request was done synchronously 1138 * Returns 1 if IO request was successfully submitted. 1139 */ 1140 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1141 int offset, bool is_write, struct bio *bio) 1142 { 1143 unsigned long start_time = jiffies; 1144 int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; 1145 struct request_queue *q = zram->disk->queue; 1146 int ret; 1147 1148 generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, 1149 &zram->disk->part0); 1150 1151 if (!is_write) { 1152 atomic64_inc(&zram->stats.num_reads); 1153 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1154 flush_dcache_page(bvec->bv_page); 1155 } else { 1156 atomic64_inc(&zram->stats.num_writes); 1157 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1158 } 1159 1160 generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); 1161 1162 if (unlikely(ret < 0)) { 1163 if (!is_write) 1164 atomic64_inc(&zram->stats.failed_reads); 1165 else 1166 atomic64_inc(&zram->stats.failed_writes); 1167 } 1168 1169 return ret; 1170 } 1171 1172 static void __zram_make_request(struct zram *zram, struct bio *bio) 1173 { 1174 int offset; 1175 u32 index; 1176 struct bio_vec bvec; 1177 struct bvec_iter iter; 1178 1179 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1180 offset = (bio->bi_iter.bi_sector & 1181 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1182 1183 switch (bio_op(bio)) { 1184 case REQ_OP_DISCARD: 1185 case REQ_OP_WRITE_ZEROES: 1186 zram_bio_discard(zram, index, offset, bio); 1187 bio_endio(bio); 1188 return; 1189 default: 1190 break; 1191 } 1192 1193 bio_for_each_segment(bvec, bio, iter) { 1194 struct bio_vec bv = bvec; 1195 unsigned int unwritten = bvec.bv_len; 1196 1197 do { 1198 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1199 unwritten); 1200 if (zram_bvec_rw(zram, &bv, index, offset, 1201 op_is_write(bio_op(bio)), bio) < 0) 1202 goto out; 1203 1204 bv.bv_offset += bv.bv_len; 1205 unwritten -= bv.bv_len; 1206 1207 update_position(&index, &offset, &bv); 1208 } while (unwritten); 1209 } 1210 1211 bio_endio(bio); 1212 return; 1213 1214 out: 1215 bio_io_error(bio); 1216 } 1217 1218 /* 1219 * Handler function for all zram I/O requests. 1220 */ 1221 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) 1222 { 1223 struct zram *zram = queue->queuedata; 1224 1225 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1226 bio->bi_iter.bi_size)) { 1227 atomic64_inc(&zram->stats.invalid_io); 1228 goto error; 1229 } 1230 1231 __zram_make_request(zram, bio); 1232 return BLK_QC_T_NONE; 1233 1234 error: 1235 bio_io_error(bio); 1236 return BLK_QC_T_NONE; 1237 } 1238 1239 static void zram_slot_free_notify(struct block_device *bdev, 1240 unsigned long index) 1241 { 1242 struct zram *zram; 1243 1244 zram = bdev->bd_disk->private_data; 1245 1246 zram_slot_lock(zram, index); 1247 zram_free_page(zram, index); 1248 zram_slot_unlock(zram, index); 1249 atomic64_inc(&zram->stats.notify_free); 1250 } 1251 1252 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1253 struct page *page, bool is_write) 1254 { 1255 int offset, ret; 1256 u32 index; 1257 struct zram *zram; 1258 struct bio_vec bv; 1259 1260 if (PageTransHuge(page)) 1261 return -ENOTSUPP; 1262 zram = bdev->bd_disk->private_data; 1263 1264 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1265 atomic64_inc(&zram->stats.invalid_io); 1266 ret = -EINVAL; 1267 goto out; 1268 } 1269 1270 index = sector >> SECTORS_PER_PAGE_SHIFT; 1271 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1272 1273 bv.bv_page = page; 1274 bv.bv_len = PAGE_SIZE; 1275 bv.bv_offset = 0; 1276 1277 ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); 1278 out: 1279 /* 1280 * If I/O fails, just return error(ie, non-zero) without 1281 * calling page_endio. 1282 * It causes resubmit the I/O with bio request by upper functions 1283 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1284 * bio->bi_end_io does things to handle the error 1285 * (e.g., SetPageError, set_page_dirty and extra works). 1286 */ 1287 if (unlikely(ret < 0)) 1288 return ret; 1289 1290 switch (ret) { 1291 case 0: 1292 page_endio(page, is_write, 0); 1293 break; 1294 case 1: 1295 ret = 0; 1296 break; 1297 default: 1298 WARN_ON(1); 1299 } 1300 return ret; 1301 } 1302 1303 static void zram_reset_device(struct zram *zram) 1304 { 1305 struct zcomp *comp; 1306 u64 disksize; 1307 1308 down_write(&zram->init_lock); 1309 1310 zram->limit_pages = 0; 1311 1312 if (!init_done(zram)) { 1313 up_write(&zram->init_lock); 1314 return; 1315 } 1316 1317 comp = zram->comp; 1318 disksize = zram->disksize; 1319 zram->disksize = 0; 1320 1321 set_capacity(zram->disk, 0); 1322 part_stat_set_all(&zram->disk->part0, 0); 1323 1324 up_write(&zram->init_lock); 1325 /* I/O operation under all of CPU are done so let's free */ 1326 zram_meta_free(zram, disksize); 1327 memset(&zram->stats, 0, sizeof(zram->stats)); 1328 zcomp_destroy(comp); 1329 reset_bdev(zram); 1330 } 1331 1332 static ssize_t disksize_store(struct device *dev, 1333 struct device_attribute *attr, const char *buf, size_t len) 1334 { 1335 u64 disksize; 1336 struct zcomp *comp; 1337 struct zram *zram = dev_to_zram(dev); 1338 int err; 1339 1340 disksize = memparse(buf, NULL); 1341 if (!disksize) 1342 return -EINVAL; 1343 1344 down_write(&zram->init_lock); 1345 if (init_done(zram)) { 1346 pr_info("Cannot change disksize for initialized device\n"); 1347 err = -EBUSY; 1348 goto out_unlock; 1349 } 1350 1351 disksize = PAGE_ALIGN(disksize); 1352 if (!zram_meta_alloc(zram, disksize)) { 1353 err = -ENOMEM; 1354 goto out_unlock; 1355 } 1356 1357 comp = zcomp_create(zram->compressor); 1358 if (IS_ERR(comp)) { 1359 pr_err("Cannot initialise %s compressing backend\n", 1360 zram->compressor); 1361 err = PTR_ERR(comp); 1362 goto out_free_meta; 1363 } 1364 1365 zram->comp = comp; 1366 zram->disksize = disksize; 1367 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1368 1369 revalidate_disk(zram->disk); 1370 up_write(&zram->init_lock); 1371 1372 return len; 1373 1374 out_free_meta: 1375 zram_meta_free(zram, disksize); 1376 out_unlock: 1377 up_write(&zram->init_lock); 1378 return err; 1379 } 1380 1381 static ssize_t reset_store(struct device *dev, 1382 struct device_attribute *attr, const char *buf, size_t len) 1383 { 1384 int ret; 1385 unsigned short do_reset; 1386 struct zram *zram; 1387 struct block_device *bdev; 1388 1389 ret = kstrtou16(buf, 10, &do_reset); 1390 if (ret) 1391 return ret; 1392 1393 if (!do_reset) 1394 return -EINVAL; 1395 1396 zram = dev_to_zram(dev); 1397 bdev = bdget_disk(zram->disk, 0); 1398 if (!bdev) 1399 return -ENOMEM; 1400 1401 mutex_lock(&bdev->bd_mutex); 1402 /* Do not reset an active device or claimed device */ 1403 if (bdev->bd_openers || zram->claim) { 1404 mutex_unlock(&bdev->bd_mutex); 1405 bdput(bdev); 1406 return -EBUSY; 1407 } 1408 1409 /* From now on, anyone can't open /dev/zram[0-9] */ 1410 zram->claim = true; 1411 mutex_unlock(&bdev->bd_mutex); 1412 1413 /* Make sure all the pending I/O are finished */ 1414 fsync_bdev(bdev); 1415 zram_reset_device(zram); 1416 revalidate_disk(zram->disk); 1417 bdput(bdev); 1418 1419 mutex_lock(&bdev->bd_mutex); 1420 zram->claim = false; 1421 mutex_unlock(&bdev->bd_mutex); 1422 1423 return len; 1424 } 1425 1426 static int zram_open(struct block_device *bdev, fmode_t mode) 1427 { 1428 int ret = 0; 1429 struct zram *zram; 1430 1431 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1432 1433 zram = bdev->bd_disk->private_data; 1434 /* zram was claimed to reset so open request fails */ 1435 if (zram->claim) 1436 ret = -EBUSY; 1437 1438 return ret; 1439 } 1440 1441 static const struct block_device_operations zram_devops = { 1442 .open = zram_open, 1443 .swap_slot_free_notify = zram_slot_free_notify, 1444 .rw_page = zram_rw_page, 1445 .owner = THIS_MODULE 1446 }; 1447 1448 static DEVICE_ATTR_WO(compact); 1449 static DEVICE_ATTR_RW(disksize); 1450 static DEVICE_ATTR_RO(initstate); 1451 static DEVICE_ATTR_WO(reset); 1452 static DEVICE_ATTR_WO(mem_limit); 1453 static DEVICE_ATTR_WO(mem_used_max); 1454 static DEVICE_ATTR_RW(max_comp_streams); 1455 static DEVICE_ATTR_RW(comp_algorithm); 1456 #ifdef CONFIG_ZRAM_WRITEBACK 1457 static DEVICE_ATTR_RW(backing_dev); 1458 #endif 1459 1460 static struct attribute *zram_disk_attrs[] = { 1461 &dev_attr_disksize.attr, 1462 &dev_attr_initstate.attr, 1463 &dev_attr_reset.attr, 1464 &dev_attr_compact.attr, 1465 &dev_attr_mem_limit.attr, 1466 &dev_attr_mem_used_max.attr, 1467 &dev_attr_max_comp_streams.attr, 1468 &dev_attr_comp_algorithm.attr, 1469 #ifdef CONFIG_ZRAM_WRITEBACK 1470 &dev_attr_backing_dev.attr, 1471 #endif 1472 &dev_attr_io_stat.attr, 1473 &dev_attr_mm_stat.attr, 1474 &dev_attr_debug_stat.attr, 1475 NULL, 1476 }; 1477 1478 static const struct attribute_group zram_disk_attr_group = { 1479 .attrs = zram_disk_attrs, 1480 }; 1481 1482 /* 1483 * Allocate and initialize new zram device. the function returns 1484 * '>= 0' device_id upon success, and negative value otherwise. 1485 */ 1486 static int zram_add(void) 1487 { 1488 struct zram *zram; 1489 struct request_queue *queue; 1490 int ret, device_id; 1491 1492 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1493 if (!zram) 1494 return -ENOMEM; 1495 1496 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1497 if (ret < 0) 1498 goto out_free_dev; 1499 device_id = ret; 1500 1501 init_rwsem(&zram->init_lock); 1502 1503 queue = blk_alloc_queue(GFP_KERNEL); 1504 if (!queue) { 1505 pr_err("Error allocating disk queue for device %d\n", 1506 device_id); 1507 ret = -ENOMEM; 1508 goto out_free_idr; 1509 } 1510 1511 blk_queue_make_request(queue, zram_make_request); 1512 1513 /* gendisk structure */ 1514 zram->disk = alloc_disk(1); 1515 if (!zram->disk) { 1516 pr_err("Error allocating disk structure for device %d\n", 1517 device_id); 1518 ret = -ENOMEM; 1519 goto out_free_queue; 1520 } 1521 1522 zram->disk->major = zram_major; 1523 zram->disk->first_minor = device_id; 1524 zram->disk->fops = &zram_devops; 1525 zram->disk->queue = queue; 1526 zram->disk->queue->queuedata = zram; 1527 zram->disk->private_data = zram; 1528 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1529 1530 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1531 set_capacity(zram->disk, 0); 1532 /* zram devices sort of resembles non-rotational disks */ 1533 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1534 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1535 1536 /* 1537 * To ensure that we always get PAGE_SIZE aligned 1538 * and n*PAGE_SIZED sized I/O requests. 1539 */ 1540 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1541 blk_queue_logical_block_size(zram->disk->queue, 1542 ZRAM_LOGICAL_BLOCK_SIZE); 1543 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1544 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1545 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1546 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1547 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1548 1549 /* 1550 * zram_bio_discard() will clear all logical blocks if logical block 1551 * size is identical with physical block size(PAGE_SIZE). But if it is 1552 * different, we will skip discarding some parts of logical blocks in 1553 * the part of the request range which isn't aligned to physical block 1554 * size. So we can't ensure that all discarded logical blocks are 1555 * zeroed. 1556 */ 1557 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1558 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1559 1560 zram->disk->queue->backing_dev_info->capabilities |= 1561 (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); 1562 add_disk(zram->disk); 1563 1564 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1565 &zram_disk_attr_group); 1566 if (ret < 0) { 1567 pr_err("Error creating sysfs group for device %d\n", 1568 device_id); 1569 goto out_free_disk; 1570 } 1571 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1572 1573 pr_info("Added device: %s\n", zram->disk->disk_name); 1574 return device_id; 1575 1576 out_free_disk: 1577 del_gendisk(zram->disk); 1578 put_disk(zram->disk); 1579 out_free_queue: 1580 blk_cleanup_queue(queue); 1581 out_free_idr: 1582 idr_remove(&zram_index_idr, device_id); 1583 out_free_dev: 1584 kfree(zram); 1585 return ret; 1586 } 1587 1588 static int zram_remove(struct zram *zram) 1589 { 1590 struct block_device *bdev; 1591 1592 bdev = bdget_disk(zram->disk, 0); 1593 if (!bdev) 1594 return -ENOMEM; 1595 1596 mutex_lock(&bdev->bd_mutex); 1597 if (bdev->bd_openers || zram->claim) { 1598 mutex_unlock(&bdev->bd_mutex); 1599 bdput(bdev); 1600 return -EBUSY; 1601 } 1602 1603 zram->claim = true; 1604 mutex_unlock(&bdev->bd_mutex); 1605 1606 /* 1607 * Remove sysfs first, so no one will perform a disksize 1608 * store while we destroy the devices. This also helps during 1609 * hot_remove -- zram_reset_device() is the last holder of 1610 * ->init_lock, no later/concurrent disksize_store() or any 1611 * other sysfs handlers are possible. 1612 */ 1613 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1614 &zram_disk_attr_group); 1615 1616 /* Make sure all the pending I/O are finished */ 1617 fsync_bdev(bdev); 1618 zram_reset_device(zram); 1619 bdput(bdev); 1620 1621 pr_info("Removed device: %s\n", zram->disk->disk_name); 1622 1623 blk_cleanup_queue(zram->disk->queue); 1624 del_gendisk(zram->disk); 1625 put_disk(zram->disk); 1626 kfree(zram); 1627 return 0; 1628 } 1629 1630 /* zram-control sysfs attributes */ 1631 1632 /* 1633 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 1634 * sense that reading from this file does alter the state of your system -- it 1635 * creates a new un-initialized zram device and returns back this device's 1636 * device_id (or an error code if it fails to create a new device). 1637 */ 1638 static ssize_t hot_add_show(struct class *class, 1639 struct class_attribute *attr, 1640 char *buf) 1641 { 1642 int ret; 1643 1644 mutex_lock(&zram_index_mutex); 1645 ret = zram_add(); 1646 mutex_unlock(&zram_index_mutex); 1647 1648 if (ret < 0) 1649 return ret; 1650 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1651 } 1652 static CLASS_ATTR_RO(hot_add); 1653 1654 static ssize_t hot_remove_store(struct class *class, 1655 struct class_attribute *attr, 1656 const char *buf, 1657 size_t count) 1658 { 1659 struct zram *zram; 1660 int ret, dev_id; 1661 1662 /* dev_id is gendisk->first_minor, which is `int' */ 1663 ret = kstrtoint(buf, 10, &dev_id); 1664 if (ret) 1665 return ret; 1666 if (dev_id < 0) 1667 return -EINVAL; 1668 1669 mutex_lock(&zram_index_mutex); 1670 1671 zram = idr_find(&zram_index_idr, dev_id); 1672 if (zram) { 1673 ret = zram_remove(zram); 1674 if (!ret) 1675 idr_remove(&zram_index_idr, dev_id); 1676 } else { 1677 ret = -ENODEV; 1678 } 1679 1680 mutex_unlock(&zram_index_mutex); 1681 return ret ? ret : count; 1682 } 1683 static CLASS_ATTR_WO(hot_remove); 1684 1685 static struct attribute *zram_control_class_attrs[] = { 1686 &class_attr_hot_add.attr, 1687 &class_attr_hot_remove.attr, 1688 NULL, 1689 }; 1690 ATTRIBUTE_GROUPS(zram_control_class); 1691 1692 static struct class zram_control_class = { 1693 .name = "zram-control", 1694 .owner = THIS_MODULE, 1695 .class_groups = zram_control_class_groups, 1696 }; 1697 1698 static int zram_remove_cb(int id, void *ptr, void *data) 1699 { 1700 zram_remove(ptr); 1701 return 0; 1702 } 1703 1704 static void destroy_devices(void) 1705 { 1706 class_unregister(&zram_control_class); 1707 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1708 idr_destroy(&zram_index_idr); 1709 unregister_blkdev(zram_major, "zram"); 1710 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1711 } 1712 1713 static int __init zram_init(void) 1714 { 1715 int ret; 1716 1717 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 1718 zcomp_cpu_up_prepare, zcomp_cpu_dead); 1719 if (ret < 0) 1720 return ret; 1721 1722 ret = class_register(&zram_control_class); 1723 if (ret) { 1724 pr_err("Unable to register zram-control class\n"); 1725 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1726 return ret; 1727 } 1728 1729 zram_major = register_blkdev(0, "zram"); 1730 if (zram_major <= 0) { 1731 pr_err("Unable to get major number\n"); 1732 class_unregister(&zram_control_class); 1733 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1734 return -EBUSY; 1735 } 1736 1737 while (num_devices != 0) { 1738 mutex_lock(&zram_index_mutex); 1739 ret = zram_add(); 1740 mutex_unlock(&zram_index_mutex); 1741 if (ret < 0) 1742 goto out_error; 1743 num_devices--; 1744 } 1745 1746 return 0; 1747 1748 out_error: 1749 destroy_devices(); 1750 return ret; 1751 } 1752 1753 static void __exit zram_exit(void) 1754 { 1755 destroy_devices(); 1756 } 1757 1758 module_init(zram_init); 1759 module_exit(zram_exit); 1760 1761 module_param(num_devices, uint, 0); 1762 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1763 1764 MODULE_LICENSE("Dual BSD/GPL"); 1765 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1766 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1767