1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/backing-dev.h> 29 #include <linux/string.h> 30 #include <linux/vmalloc.h> 31 #include <linux/err.h> 32 #include <linux/idr.h> 33 #include <linux/sysfs.h> 34 #include <linux/cpuhotplug.h> 35 36 #include "zram_drv.h" 37 38 static DEFINE_IDR(zram_index_idr); 39 /* idr index must be protected */ 40 static DEFINE_MUTEX(zram_index_mutex); 41 42 static int zram_major; 43 static const char *default_compressor = "lzo"; 44 45 /* Module params (documentation at end) */ 46 static unsigned int num_devices = 1; 47 48 static void zram_free_page(struct zram *zram, size_t index); 49 50 static inline bool init_done(struct zram *zram) 51 { 52 return zram->disksize; 53 } 54 55 static inline struct zram *dev_to_zram(struct device *dev) 56 { 57 return (struct zram *)dev_to_disk(dev)->private_data; 58 } 59 60 static unsigned long zram_get_handle(struct zram *zram, u32 index) 61 { 62 return zram->table[index].handle; 63 } 64 65 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 66 { 67 zram->table[index].handle = handle; 68 } 69 70 /* flag operations require table entry bit_spin_lock() being held */ 71 static int zram_test_flag(struct zram *zram, u32 index, 72 enum zram_pageflags flag) 73 { 74 return zram->table[index].value & BIT(flag); 75 } 76 77 static void zram_set_flag(struct zram *zram, u32 index, 78 enum zram_pageflags flag) 79 { 80 zram->table[index].value |= BIT(flag); 81 } 82 83 static void zram_clear_flag(struct zram *zram, u32 index, 84 enum zram_pageflags flag) 85 { 86 zram->table[index].value &= ~BIT(flag); 87 } 88 89 static inline void zram_set_element(struct zram *zram, u32 index, 90 unsigned long element) 91 { 92 zram->table[index].element = element; 93 } 94 95 static unsigned long zram_get_element(struct zram *zram, u32 index) 96 { 97 return zram->table[index].element; 98 } 99 100 static size_t zram_get_obj_size(struct zram *zram, u32 index) 101 { 102 return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 103 } 104 105 static void zram_set_obj_size(struct zram *zram, 106 u32 index, size_t size) 107 { 108 unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; 109 110 zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 111 } 112 113 #if PAGE_SIZE != 4096 114 static inline bool is_partial_io(struct bio_vec *bvec) 115 { 116 return bvec->bv_len != PAGE_SIZE; 117 } 118 #else 119 static inline bool is_partial_io(struct bio_vec *bvec) 120 { 121 return false; 122 } 123 #endif 124 125 static void zram_revalidate_disk(struct zram *zram) 126 { 127 revalidate_disk(zram->disk); 128 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ 129 zram->disk->queue->backing_dev_info->capabilities |= 130 BDI_CAP_STABLE_WRITES; 131 } 132 133 /* 134 * Check if request is within bounds and aligned on zram logical blocks. 135 */ 136 static inline bool valid_io_request(struct zram *zram, 137 sector_t start, unsigned int size) 138 { 139 u64 end, bound; 140 141 /* unaligned request */ 142 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 143 return false; 144 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 145 return false; 146 147 end = start + (size >> SECTOR_SHIFT); 148 bound = zram->disksize >> SECTOR_SHIFT; 149 /* out of range range */ 150 if (unlikely(start >= bound || end > bound || start > end)) 151 return false; 152 153 /* I/O request is valid */ 154 return true; 155 } 156 157 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 158 { 159 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 160 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 161 } 162 163 static inline void update_used_max(struct zram *zram, 164 const unsigned long pages) 165 { 166 unsigned long old_max, cur_max; 167 168 old_max = atomic_long_read(&zram->stats.max_used_pages); 169 170 do { 171 cur_max = old_max; 172 if (pages > cur_max) 173 old_max = atomic_long_cmpxchg( 174 &zram->stats.max_used_pages, cur_max, pages); 175 } while (old_max != cur_max); 176 } 177 178 static inline void zram_fill_page(char *ptr, unsigned long len, 179 unsigned long value) 180 { 181 int i; 182 unsigned long *page = (unsigned long *)ptr; 183 184 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 185 186 if (likely(value == 0)) { 187 memset(ptr, 0, len); 188 } else { 189 for (i = 0; i < len / sizeof(*page); i++) 190 page[i] = value; 191 } 192 } 193 194 static bool page_same_filled(void *ptr, unsigned long *element) 195 { 196 unsigned int pos; 197 unsigned long *page; 198 unsigned long val; 199 200 page = (unsigned long *)ptr; 201 val = page[0]; 202 203 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 204 if (val != page[pos]) 205 return false; 206 } 207 208 *element = val; 209 210 return true; 211 } 212 213 static ssize_t initstate_show(struct device *dev, 214 struct device_attribute *attr, char *buf) 215 { 216 u32 val; 217 struct zram *zram = dev_to_zram(dev); 218 219 down_read(&zram->init_lock); 220 val = init_done(zram); 221 up_read(&zram->init_lock); 222 223 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 224 } 225 226 static ssize_t disksize_show(struct device *dev, 227 struct device_attribute *attr, char *buf) 228 { 229 struct zram *zram = dev_to_zram(dev); 230 231 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 232 } 233 234 static ssize_t mem_limit_store(struct device *dev, 235 struct device_attribute *attr, const char *buf, size_t len) 236 { 237 u64 limit; 238 char *tmp; 239 struct zram *zram = dev_to_zram(dev); 240 241 limit = memparse(buf, &tmp); 242 if (buf == tmp) /* no chars parsed, invalid input */ 243 return -EINVAL; 244 245 down_write(&zram->init_lock); 246 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 247 up_write(&zram->init_lock); 248 249 return len; 250 } 251 252 static ssize_t mem_used_max_store(struct device *dev, 253 struct device_attribute *attr, const char *buf, size_t len) 254 { 255 int err; 256 unsigned long val; 257 struct zram *zram = dev_to_zram(dev); 258 259 err = kstrtoul(buf, 10, &val); 260 if (err || val != 0) 261 return -EINVAL; 262 263 down_read(&zram->init_lock); 264 if (init_done(zram)) { 265 atomic_long_set(&zram->stats.max_used_pages, 266 zs_get_total_pages(zram->mem_pool)); 267 } 268 up_read(&zram->init_lock); 269 270 return len; 271 } 272 273 #ifdef CONFIG_ZRAM_WRITEBACK 274 static bool zram_wb_enabled(struct zram *zram) 275 { 276 return zram->backing_dev; 277 } 278 279 static void reset_bdev(struct zram *zram) 280 { 281 struct block_device *bdev; 282 283 if (!zram_wb_enabled(zram)) 284 return; 285 286 bdev = zram->bdev; 287 if (zram->old_block_size) 288 set_blocksize(bdev, zram->old_block_size); 289 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 290 /* hope filp_close flush all of IO */ 291 filp_close(zram->backing_dev, NULL); 292 zram->backing_dev = NULL; 293 zram->old_block_size = 0; 294 zram->bdev = NULL; 295 296 kvfree(zram->bitmap); 297 zram->bitmap = NULL; 298 } 299 300 static ssize_t backing_dev_show(struct device *dev, 301 struct device_attribute *attr, char *buf) 302 { 303 struct zram *zram = dev_to_zram(dev); 304 struct file *file = zram->backing_dev; 305 char *p; 306 ssize_t ret; 307 308 down_read(&zram->init_lock); 309 if (!zram_wb_enabled(zram)) { 310 memcpy(buf, "none\n", 5); 311 up_read(&zram->init_lock); 312 return 5; 313 } 314 315 p = file_path(file, buf, PAGE_SIZE - 1); 316 if (IS_ERR(p)) { 317 ret = PTR_ERR(p); 318 goto out; 319 } 320 321 ret = strlen(p); 322 memmove(buf, p, ret); 323 buf[ret++] = '\n'; 324 out: 325 up_read(&zram->init_lock); 326 return ret; 327 } 328 329 static ssize_t backing_dev_store(struct device *dev, 330 struct device_attribute *attr, const char *buf, size_t len) 331 { 332 char *file_name; 333 struct file *backing_dev = NULL; 334 struct inode *inode; 335 struct address_space *mapping; 336 unsigned int bitmap_sz, old_block_size = 0; 337 unsigned long nr_pages, *bitmap = NULL; 338 struct block_device *bdev = NULL; 339 int err; 340 struct zram *zram = dev_to_zram(dev); 341 342 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 343 if (!file_name) 344 return -ENOMEM; 345 346 down_write(&zram->init_lock); 347 if (init_done(zram)) { 348 pr_info("Can't setup backing device for initialized device\n"); 349 err = -EBUSY; 350 goto out; 351 } 352 353 strlcpy(file_name, buf, len); 354 355 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 356 if (IS_ERR(backing_dev)) { 357 err = PTR_ERR(backing_dev); 358 backing_dev = NULL; 359 goto out; 360 } 361 362 mapping = backing_dev->f_mapping; 363 inode = mapping->host; 364 365 /* Support only block device in this moment */ 366 if (!S_ISBLK(inode->i_mode)) { 367 err = -ENOTBLK; 368 goto out; 369 } 370 371 bdev = bdgrab(I_BDEV(inode)); 372 err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 373 if (err < 0) 374 goto out; 375 376 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 377 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 378 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 379 if (!bitmap) { 380 err = -ENOMEM; 381 goto out; 382 } 383 384 old_block_size = block_size(bdev); 385 err = set_blocksize(bdev, PAGE_SIZE); 386 if (err) 387 goto out; 388 389 reset_bdev(zram); 390 spin_lock_init(&zram->bitmap_lock); 391 392 zram->old_block_size = old_block_size; 393 zram->bdev = bdev; 394 zram->backing_dev = backing_dev; 395 zram->bitmap = bitmap; 396 zram->nr_pages = nr_pages; 397 up_write(&zram->init_lock); 398 399 pr_info("setup backing device %s\n", file_name); 400 kfree(file_name); 401 402 return len; 403 out: 404 if (bitmap) 405 kvfree(bitmap); 406 407 if (bdev) 408 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 409 410 if (backing_dev) 411 filp_close(backing_dev, NULL); 412 413 up_write(&zram->init_lock); 414 415 kfree(file_name); 416 417 return err; 418 } 419 420 static unsigned long get_entry_bdev(struct zram *zram) 421 { 422 unsigned long entry; 423 424 spin_lock(&zram->bitmap_lock); 425 /* skip 0 bit to confuse zram.handle = 0 */ 426 entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); 427 if (entry == zram->nr_pages) { 428 spin_unlock(&zram->bitmap_lock); 429 return 0; 430 } 431 432 set_bit(entry, zram->bitmap); 433 spin_unlock(&zram->bitmap_lock); 434 435 return entry; 436 } 437 438 static void put_entry_bdev(struct zram *zram, unsigned long entry) 439 { 440 int was_set; 441 442 spin_lock(&zram->bitmap_lock); 443 was_set = test_and_clear_bit(entry, zram->bitmap); 444 spin_unlock(&zram->bitmap_lock); 445 WARN_ON_ONCE(!was_set); 446 } 447 448 void zram_page_end_io(struct bio *bio) 449 { 450 struct page *page = bio->bi_io_vec[0].bv_page; 451 452 page_endio(page, op_is_write(bio_op(bio)), 453 blk_status_to_errno(bio->bi_status)); 454 bio_put(bio); 455 } 456 457 /* 458 * Returns 1 if the submission is successful. 459 */ 460 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 461 unsigned long entry, struct bio *parent) 462 { 463 struct bio *bio; 464 465 bio = bio_alloc(GFP_ATOMIC, 1); 466 if (!bio) 467 return -ENOMEM; 468 469 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 470 bio->bi_bdev = zram->bdev; 471 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 472 bio_put(bio); 473 return -EIO; 474 } 475 476 if (!parent) { 477 bio->bi_opf = REQ_OP_READ; 478 bio->bi_end_io = zram_page_end_io; 479 } else { 480 bio->bi_opf = parent->bi_opf; 481 bio_chain(bio, parent); 482 } 483 484 submit_bio(bio); 485 return 1; 486 } 487 488 struct zram_work { 489 struct work_struct work; 490 struct zram *zram; 491 unsigned long entry; 492 struct bio *bio; 493 }; 494 495 #if PAGE_SIZE != 4096 496 static void zram_sync_read(struct work_struct *work) 497 { 498 struct bio_vec bvec; 499 struct zram_work *zw = container_of(work, struct zram_work, work); 500 struct zram *zram = zw->zram; 501 unsigned long entry = zw->entry; 502 struct bio *bio = zw->bio; 503 504 read_from_bdev_async(zram, &bvec, entry, bio); 505 } 506 507 /* 508 * Block layer want one ->make_request_fn to be active at a time 509 * so if we use chained IO with parent IO in same context, 510 * it's a deadlock. To avoid, it, it uses worker thread context. 511 */ 512 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 513 unsigned long entry, struct bio *bio) 514 { 515 struct zram_work work; 516 517 work.zram = zram; 518 work.entry = entry; 519 work.bio = bio; 520 521 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 522 queue_work(system_unbound_wq, &work.work); 523 flush_work(&work.work); 524 destroy_work_on_stack(&work.work); 525 526 return 1; 527 } 528 #else 529 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 530 unsigned long entry, struct bio *bio) 531 { 532 WARN_ON(1); 533 return -EIO; 534 } 535 #endif 536 537 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 538 unsigned long entry, struct bio *parent, bool sync) 539 { 540 if (sync) 541 return read_from_bdev_sync(zram, bvec, entry, parent); 542 else 543 return read_from_bdev_async(zram, bvec, entry, parent); 544 } 545 546 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 547 u32 index, struct bio *parent, 548 unsigned long *pentry) 549 { 550 struct bio *bio; 551 unsigned long entry; 552 553 bio = bio_alloc(GFP_ATOMIC, 1); 554 if (!bio) 555 return -ENOMEM; 556 557 entry = get_entry_bdev(zram); 558 if (!entry) { 559 bio_put(bio); 560 return -ENOSPC; 561 } 562 563 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 564 bio->bi_bdev = zram->bdev; 565 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, 566 bvec->bv_offset)) { 567 bio_put(bio); 568 put_entry_bdev(zram, entry); 569 return -EIO; 570 } 571 572 if (!parent) { 573 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 574 bio->bi_end_io = zram_page_end_io; 575 } else { 576 bio->bi_opf = parent->bi_opf; 577 bio_chain(bio, parent); 578 } 579 580 submit_bio(bio); 581 *pentry = entry; 582 583 return 0; 584 } 585 586 static void zram_wb_clear(struct zram *zram, u32 index) 587 { 588 unsigned long entry; 589 590 zram_clear_flag(zram, index, ZRAM_WB); 591 entry = zram_get_element(zram, index); 592 zram_set_element(zram, index, 0); 593 put_entry_bdev(zram, entry); 594 } 595 596 #else 597 static bool zram_wb_enabled(struct zram *zram) { return false; } 598 static inline void reset_bdev(struct zram *zram) {}; 599 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 600 u32 index, struct bio *parent, 601 unsigned long *pentry) 602 603 { 604 return -EIO; 605 } 606 607 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 608 unsigned long entry, struct bio *parent, bool sync) 609 { 610 return -EIO; 611 } 612 static void zram_wb_clear(struct zram *zram, u32 index) {} 613 #endif 614 615 616 /* 617 * We switched to per-cpu streams and this attr is not needed anymore. 618 * However, we will keep it around for some time, because: 619 * a) we may revert per-cpu streams in the future 620 * b) it's visible to user space and we need to follow our 2 years 621 * retirement rule; but we already have a number of 'soon to be 622 * altered' attrs, so max_comp_streams need to wait for the next 623 * layoff cycle. 624 */ 625 static ssize_t max_comp_streams_show(struct device *dev, 626 struct device_attribute *attr, char *buf) 627 { 628 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 629 } 630 631 static ssize_t max_comp_streams_store(struct device *dev, 632 struct device_attribute *attr, const char *buf, size_t len) 633 { 634 return len; 635 } 636 637 static ssize_t comp_algorithm_show(struct device *dev, 638 struct device_attribute *attr, char *buf) 639 { 640 size_t sz; 641 struct zram *zram = dev_to_zram(dev); 642 643 down_read(&zram->init_lock); 644 sz = zcomp_available_show(zram->compressor, buf); 645 up_read(&zram->init_lock); 646 647 return sz; 648 } 649 650 static ssize_t comp_algorithm_store(struct device *dev, 651 struct device_attribute *attr, const char *buf, size_t len) 652 { 653 struct zram *zram = dev_to_zram(dev); 654 char compressor[ARRAY_SIZE(zram->compressor)]; 655 size_t sz; 656 657 strlcpy(compressor, buf, sizeof(compressor)); 658 /* ignore trailing newline */ 659 sz = strlen(compressor); 660 if (sz > 0 && compressor[sz - 1] == '\n') 661 compressor[sz - 1] = 0x00; 662 663 if (!zcomp_available_algorithm(compressor)) 664 return -EINVAL; 665 666 down_write(&zram->init_lock); 667 if (init_done(zram)) { 668 up_write(&zram->init_lock); 669 pr_info("Can't change algorithm for initialized device\n"); 670 return -EBUSY; 671 } 672 673 strcpy(zram->compressor, compressor); 674 up_write(&zram->init_lock); 675 return len; 676 } 677 678 static ssize_t compact_store(struct device *dev, 679 struct device_attribute *attr, const char *buf, size_t len) 680 { 681 struct zram *zram = dev_to_zram(dev); 682 683 down_read(&zram->init_lock); 684 if (!init_done(zram)) { 685 up_read(&zram->init_lock); 686 return -EINVAL; 687 } 688 689 zs_compact(zram->mem_pool); 690 up_read(&zram->init_lock); 691 692 return len; 693 } 694 695 static ssize_t io_stat_show(struct device *dev, 696 struct device_attribute *attr, char *buf) 697 { 698 struct zram *zram = dev_to_zram(dev); 699 ssize_t ret; 700 701 down_read(&zram->init_lock); 702 ret = scnprintf(buf, PAGE_SIZE, 703 "%8llu %8llu %8llu %8llu\n", 704 (u64)atomic64_read(&zram->stats.failed_reads), 705 (u64)atomic64_read(&zram->stats.failed_writes), 706 (u64)atomic64_read(&zram->stats.invalid_io), 707 (u64)atomic64_read(&zram->stats.notify_free)); 708 up_read(&zram->init_lock); 709 710 return ret; 711 } 712 713 static ssize_t mm_stat_show(struct device *dev, 714 struct device_attribute *attr, char *buf) 715 { 716 struct zram *zram = dev_to_zram(dev); 717 struct zs_pool_stats pool_stats; 718 u64 orig_size, mem_used = 0; 719 long max_used; 720 ssize_t ret; 721 722 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 723 724 down_read(&zram->init_lock); 725 if (init_done(zram)) { 726 mem_used = zs_get_total_pages(zram->mem_pool); 727 zs_pool_stats(zram->mem_pool, &pool_stats); 728 } 729 730 orig_size = atomic64_read(&zram->stats.pages_stored); 731 max_used = atomic_long_read(&zram->stats.max_used_pages); 732 733 ret = scnprintf(buf, PAGE_SIZE, 734 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 735 orig_size << PAGE_SHIFT, 736 (u64)atomic64_read(&zram->stats.compr_data_size), 737 mem_used << PAGE_SHIFT, 738 zram->limit_pages << PAGE_SHIFT, 739 max_used << PAGE_SHIFT, 740 (u64)atomic64_read(&zram->stats.same_pages), 741 pool_stats.pages_compacted); 742 up_read(&zram->init_lock); 743 744 return ret; 745 } 746 747 static ssize_t debug_stat_show(struct device *dev, 748 struct device_attribute *attr, char *buf) 749 { 750 int version = 1; 751 struct zram *zram = dev_to_zram(dev); 752 ssize_t ret; 753 754 down_read(&zram->init_lock); 755 ret = scnprintf(buf, PAGE_SIZE, 756 "version: %d\n%8llu\n", 757 version, 758 (u64)atomic64_read(&zram->stats.writestall)); 759 up_read(&zram->init_lock); 760 761 return ret; 762 } 763 764 static DEVICE_ATTR_RO(io_stat); 765 static DEVICE_ATTR_RO(mm_stat); 766 static DEVICE_ATTR_RO(debug_stat); 767 768 static void zram_slot_lock(struct zram *zram, u32 index) 769 { 770 bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); 771 } 772 773 static void zram_slot_unlock(struct zram *zram, u32 index) 774 { 775 bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); 776 } 777 778 static bool zram_same_page_read(struct zram *zram, u32 index, 779 struct page *page, 780 unsigned int offset, unsigned int len) 781 { 782 zram_slot_lock(zram, index); 783 if (unlikely(!zram_get_handle(zram, index) || 784 zram_test_flag(zram, index, ZRAM_SAME))) { 785 void *mem; 786 787 zram_slot_unlock(zram, index); 788 mem = kmap_atomic(page); 789 zram_fill_page(mem + offset, len, 790 zram_get_element(zram, index)); 791 kunmap_atomic(mem); 792 return true; 793 } 794 zram_slot_unlock(zram, index); 795 796 return false; 797 } 798 799 static void zram_meta_free(struct zram *zram, u64 disksize) 800 { 801 size_t num_pages = disksize >> PAGE_SHIFT; 802 size_t index; 803 804 /* Free all pages that are still in this zram device */ 805 for (index = 0; index < num_pages; index++) 806 zram_free_page(zram, index); 807 808 zs_destroy_pool(zram->mem_pool); 809 vfree(zram->table); 810 } 811 812 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 813 { 814 size_t num_pages; 815 816 num_pages = disksize >> PAGE_SHIFT; 817 zram->table = vzalloc(num_pages * sizeof(*zram->table)); 818 if (!zram->table) 819 return false; 820 821 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 822 if (!zram->mem_pool) { 823 vfree(zram->table); 824 return false; 825 } 826 827 return true; 828 } 829 830 /* 831 * To protect concurrent access to the same index entry, 832 * caller should hold this table index entry's bit_spinlock to 833 * indicate this index entry is accessing. 834 */ 835 static void zram_free_page(struct zram *zram, size_t index) 836 { 837 unsigned long handle; 838 839 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { 840 zram_wb_clear(zram, index); 841 atomic64_dec(&zram->stats.pages_stored); 842 return; 843 } 844 845 /* 846 * No memory is allocated for same element filled pages. 847 * Simply clear same page flag. 848 */ 849 if (zram_test_flag(zram, index, ZRAM_SAME)) { 850 zram_clear_flag(zram, index, ZRAM_SAME); 851 zram_set_element(zram, index, 0); 852 atomic64_dec(&zram->stats.same_pages); 853 atomic64_dec(&zram->stats.pages_stored); 854 return; 855 } 856 857 handle = zram_get_handle(zram, index); 858 if (!handle) 859 return; 860 861 zs_free(zram->mem_pool, handle); 862 863 atomic64_sub(zram_get_obj_size(zram, index), 864 &zram->stats.compr_data_size); 865 atomic64_dec(&zram->stats.pages_stored); 866 867 zram_set_handle(zram, index, 0); 868 zram_set_obj_size(zram, index, 0); 869 } 870 871 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 872 struct bio *bio, bool partial_io) 873 { 874 int ret; 875 unsigned long handle; 876 unsigned int size; 877 void *src, *dst; 878 879 if (zram_wb_enabled(zram)) { 880 zram_slot_lock(zram, index); 881 if (zram_test_flag(zram, index, ZRAM_WB)) { 882 struct bio_vec bvec; 883 884 zram_slot_unlock(zram, index); 885 886 bvec.bv_page = page; 887 bvec.bv_len = PAGE_SIZE; 888 bvec.bv_offset = 0; 889 return read_from_bdev(zram, &bvec, 890 zram_get_element(zram, index), 891 bio, partial_io); 892 } 893 zram_slot_unlock(zram, index); 894 } 895 896 if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) 897 return 0; 898 899 zram_slot_lock(zram, index); 900 handle = zram_get_handle(zram, index); 901 size = zram_get_obj_size(zram, index); 902 903 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 904 if (size == PAGE_SIZE) { 905 dst = kmap_atomic(page); 906 memcpy(dst, src, PAGE_SIZE); 907 kunmap_atomic(dst); 908 ret = 0; 909 } else { 910 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); 911 912 dst = kmap_atomic(page); 913 ret = zcomp_decompress(zstrm, src, size, dst); 914 kunmap_atomic(dst); 915 zcomp_stream_put(zram->comp); 916 } 917 zs_unmap_object(zram->mem_pool, handle); 918 zram_slot_unlock(zram, index); 919 920 /* Should NEVER happen. Return bio error if it does. */ 921 if (unlikely(ret)) 922 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 923 924 return ret; 925 } 926 927 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 928 u32 index, int offset, struct bio *bio) 929 { 930 int ret; 931 struct page *page; 932 933 page = bvec->bv_page; 934 if (is_partial_io(bvec)) { 935 /* Use a temporary buffer to decompress the page */ 936 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 937 if (!page) 938 return -ENOMEM; 939 } 940 941 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 942 if (unlikely(ret)) 943 goto out; 944 945 if (is_partial_io(bvec)) { 946 void *dst = kmap_atomic(bvec->bv_page); 947 void *src = kmap_atomic(page); 948 949 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 950 kunmap_atomic(src); 951 kunmap_atomic(dst); 952 } 953 out: 954 if (is_partial_io(bvec)) 955 __free_page(page); 956 957 return ret; 958 } 959 960 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 961 u32 index, struct bio *bio) 962 { 963 int ret = 0; 964 unsigned long alloced_pages; 965 unsigned long handle = 0; 966 unsigned int comp_len = 0; 967 void *src, *dst, *mem; 968 struct zcomp_strm *zstrm; 969 struct page *page = bvec->bv_page; 970 unsigned long element = 0; 971 enum zram_pageflags flags = 0; 972 bool allow_wb = true; 973 974 mem = kmap_atomic(page); 975 if (page_same_filled(mem, &element)) { 976 kunmap_atomic(mem); 977 /* Free memory associated with this sector now. */ 978 flags = ZRAM_SAME; 979 atomic64_inc(&zram->stats.same_pages); 980 goto out; 981 } 982 kunmap_atomic(mem); 983 984 compress_again: 985 zstrm = zcomp_stream_get(zram->comp); 986 src = kmap_atomic(page); 987 ret = zcomp_compress(zstrm, src, &comp_len); 988 kunmap_atomic(src); 989 990 if (unlikely(ret)) { 991 zcomp_stream_put(zram->comp); 992 pr_err("Compression failed! err=%d\n", ret); 993 zs_free(zram->mem_pool, handle); 994 return ret; 995 } 996 997 if (unlikely(comp_len > max_zpage_size)) { 998 if (zram_wb_enabled(zram) && allow_wb) { 999 zcomp_stream_put(zram->comp); 1000 ret = write_to_bdev(zram, bvec, index, bio, &element); 1001 if (!ret) { 1002 flags = ZRAM_WB; 1003 ret = 1; 1004 goto out; 1005 } 1006 allow_wb = false; 1007 goto compress_again; 1008 } 1009 comp_len = PAGE_SIZE; 1010 } 1011 1012 /* 1013 * handle allocation has 2 paths: 1014 * a) fast path is executed with preemption disabled (for 1015 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1016 * since we can't sleep; 1017 * b) slow path enables preemption and attempts to allocate 1018 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1019 * put per-cpu compression stream and, thus, to re-do 1020 * the compression once handle is allocated. 1021 * 1022 * if we have a 'non-null' handle here then we are coming 1023 * from the slow path and handle has already been allocated. 1024 */ 1025 if (!handle) 1026 handle = zs_malloc(zram->mem_pool, comp_len, 1027 __GFP_KSWAPD_RECLAIM | 1028 __GFP_NOWARN | 1029 __GFP_HIGHMEM | 1030 __GFP_MOVABLE); 1031 if (!handle) { 1032 zcomp_stream_put(zram->comp); 1033 atomic64_inc(&zram->stats.writestall); 1034 handle = zs_malloc(zram->mem_pool, comp_len, 1035 GFP_NOIO | __GFP_HIGHMEM | 1036 __GFP_MOVABLE); 1037 if (handle) 1038 goto compress_again; 1039 return -ENOMEM; 1040 } 1041 1042 alloced_pages = zs_get_total_pages(zram->mem_pool); 1043 update_used_max(zram, alloced_pages); 1044 1045 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1046 zcomp_stream_put(zram->comp); 1047 zs_free(zram->mem_pool, handle); 1048 return -ENOMEM; 1049 } 1050 1051 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1052 1053 src = zstrm->buffer; 1054 if (comp_len == PAGE_SIZE) 1055 src = kmap_atomic(page); 1056 memcpy(dst, src, comp_len); 1057 if (comp_len == PAGE_SIZE) 1058 kunmap_atomic(src); 1059 1060 zcomp_stream_put(zram->comp); 1061 zs_unmap_object(zram->mem_pool, handle); 1062 atomic64_add(comp_len, &zram->stats.compr_data_size); 1063 out: 1064 /* 1065 * Free memory associated with this sector 1066 * before overwriting unused sectors. 1067 */ 1068 zram_slot_lock(zram, index); 1069 zram_free_page(zram, index); 1070 1071 if (flags) { 1072 zram_set_flag(zram, index, flags); 1073 zram_set_element(zram, index, element); 1074 } else { 1075 zram_set_handle(zram, index, handle); 1076 zram_set_obj_size(zram, index, comp_len); 1077 } 1078 zram_slot_unlock(zram, index); 1079 1080 /* Update stats */ 1081 atomic64_inc(&zram->stats.pages_stored); 1082 return ret; 1083 } 1084 1085 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1086 u32 index, int offset, struct bio *bio) 1087 { 1088 int ret; 1089 struct page *page = NULL; 1090 void *src; 1091 struct bio_vec vec; 1092 1093 vec = *bvec; 1094 if (is_partial_io(bvec)) { 1095 void *dst; 1096 /* 1097 * This is a partial IO. We need to read the full page 1098 * before to write the changes. 1099 */ 1100 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1101 if (!page) 1102 return -ENOMEM; 1103 1104 ret = __zram_bvec_read(zram, page, index, bio, true); 1105 if (ret) 1106 goto out; 1107 1108 src = kmap_atomic(bvec->bv_page); 1109 dst = kmap_atomic(page); 1110 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1111 kunmap_atomic(dst); 1112 kunmap_atomic(src); 1113 1114 vec.bv_page = page; 1115 vec.bv_len = PAGE_SIZE; 1116 vec.bv_offset = 0; 1117 } 1118 1119 ret = __zram_bvec_write(zram, &vec, index, bio); 1120 out: 1121 if (is_partial_io(bvec)) 1122 __free_page(page); 1123 return ret; 1124 } 1125 1126 /* 1127 * zram_bio_discard - handler on discard request 1128 * @index: physical block index in PAGE_SIZE units 1129 * @offset: byte offset within physical block 1130 */ 1131 static void zram_bio_discard(struct zram *zram, u32 index, 1132 int offset, struct bio *bio) 1133 { 1134 size_t n = bio->bi_iter.bi_size; 1135 1136 /* 1137 * zram manages data in physical block size units. Because logical block 1138 * size isn't identical with physical block size on some arch, we 1139 * could get a discard request pointing to a specific offset within a 1140 * certain physical block. Although we can handle this request by 1141 * reading that physiclal block and decompressing and partially zeroing 1142 * and re-compressing and then re-storing it, this isn't reasonable 1143 * because our intent with a discard request is to save memory. So 1144 * skipping this logical block is appropriate here. 1145 */ 1146 if (offset) { 1147 if (n <= (PAGE_SIZE - offset)) 1148 return; 1149 1150 n -= (PAGE_SIZE - offset); 1151 index++; 1152 } 1153 1154 while (n >= PAGE_SIZE) { 1155 zram_slot_lock(zram, index); 1156 zram_free_page(zram, index); 1157 zram_slot_unlock(zram, index); 1158 atomic64_inc(&zram->stats.notify_free); 1159 index++; 1160 n -= PAGE_SIZE; 1161 } 1162 } 1163 1164 /* 1165 * Returns errno if it has some problem. Otherwise return 0 or 1. 1166 * Returns 0 if IO request was done synchronously 1167 * Returns 1 if IO request was successfully submitted. 1168 */ 1169 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1170 int offset, bool is_write, struct bio *bio) 1171 { 1172 unsigned long start_time = jiffies; 1173 int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; 1174 int ret; 1175 1176 generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT, 1177 &zram->disk->part0); 1178 1179 if (!is_write) { 1180 atomic64_inc(&zram->stats.num_reads); 1181 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1182 flush_dcache_page(bvec->bv_page); 1183 } else { 1184 atomic64_inc(&zram->stats.num_writes); 1185 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1186 } 1187 1188 generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); 1189 1190 if (unlikely(ret < 0)) { 1191 if (!is_write) 1192 atomic64_inc(&zram->stats.failed_reads); 1193 else 1194 atomic64_inc(&zram->stats.failed_writes); 1195 } 1196 1197 return ret; 1198 } 1199 1200 static void __zram_make_request(struct zram *zram, struct bio *bio) 1201 { 1202 int offset; 1203 u32 index; 1204 struct bio_vec bvec; 1205 struct bvec_iter iter; 1206 1207 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1208 offset = (bio->bi_iter.bi_sector & 1209 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1210 1211 switch (bio_op(bio)) { 1212 case REQ_OP_DISCARD: 1213 case REQ_OP_WRITE_ZEROES: 1214 zram_bio_discard(zram, index, offset, bio); 1215 bio_endio(bio); 1216 return; 1217 default: 1218 break; 1219 } 1220 1221 bio_for_each_segment(bvec, bio, iter) { 1222 struct bio_vec bv = bvec; 1223 unsigned int unwritten = bvec.bv_len; 1224 1225 do { 1226 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1227 unwritten); 1228 if (zram_bvec_rw(zram, &bv, index, offset, 1229 op_is_write(bio_op(bio)), bio) < 0) 1230 goto out; 1231 1232 bv.bv_offset += bv.bv_len; 1233 unwritten -= bv.bv_len; 1234 1235 update_position(&index, &offset, &bv); 1236 } while (unwritten); 1237 } 1238 1239 bio_endio(bio); 1240 return; 1241 1242 out: 1243 bio_io_error(bio); 1244 } 1245 1246 /* 1247 * Handler function for all zram I/O requests. 1248 */ 1249 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) 1250 { 1251 struct zram *zram = queue->queuedata; 1252 1253 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1254 bio->bi_iter.bi_size)) { 1255 atomic64_inc(&zram->stats.invalid_io); 1256 goto error; 1257 } 1258 1259 __zram_make_request(zram, bio); 1260 return BLK_QC_T_NONE; 1261 1262 error: 1263 bio_io_error(bio); 1264 return BLK_QC_T_NONE; 1265 } 1266 1267 static void zram_slot_free_notify(struct block_device *bdev, 1268 unsigned long index) 1269 { 1270 struct zram *zram; 1271 1272 zram = bdev->bd_disk->private_data; 1273 1274 zram_slot_lock(zram, index); 1275 zram_free_page(zram, index); 1276 zram_slot_unlock(zram, index); 1277 atomic64_inc(&zram->stats.notify_free); 1278 } 1279 1280 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1281 struct page *page, bool is_write) 1282 { 1283 int offset, ret; 1284 u32 index; 1285 struct zram *zram; 1286 struct bio_vec bv; 1287 1288 zram = bdev->bd_disk->private_data; 1289 1290 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1291 atomic64_inc(&zram->stats.invalid_io); 1292 ret = -EINVAL; 1293 goto out; 1294 } 1295 1296 index = sector >> SECTORS_PER_PAGE_SHIFT; 1297 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1298 1299 bv.bv_page = page; 1300 bv.bv_len = PAGE_SIZE; 1301 bv.bv_offset = 0; 1302 1303 ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); 1304 out: 1305 /* 1306 * If I/O fails, just return error(ie, non-zero) without 1307 * calling page_endio. 1308 * It causes resubmit the I/O with bio request by upper functions 1309 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1310 * bio->bi_end_io does things to handle the error 1311 * (e.g., SetPageError, set_page_dirty and extra works). 1312 */ 1313 if (unlikely(ret < 0)) 1314 return ret; 1315 1316 switch (ret) { 1317 case 0: 1318 page_endio(page, is_write, 0); 1319 break; 1320 case 1: 1321 ret = 0; 1322 break; 1323 default: 1324 WARN_ON(1); 1325 } 1326 return ret; 1327 } 1328 1329 static void zram_reset_device(struct zram *zram) 1330 { 1331 struct zcomp *comp; 1332 u64 disksize; 1333 1334 down_write(&zram->init_lock); 1335 1336 zram->limit_pages = 0; 1337 1338 if (!init_done(zram)) { 1339 up_write(&zram->init_lock); 1340 return; 1341 } 1342 1343 comp = zram->comp; 1344 disksize = zram->disksize; 1345 zram->disksize = 0; 1346 1347 set_capacity(zram->disk, 0); 1348 part_stat_set_all(&zram->disk->part0, 0); 1349 1350 up_write(&zram->init_lock); 1351 /* I/O operation under all of CPU are done so let's free */ 1352 zram_meta_free(zram, disksize); 1353 memset(&zram->stats, 0, sizeof(zram->stats)); 1354 zcomp_destroy(comp); 1355 reset_bdev(zram); 1356 } 1357 1358 static ssize_t disksize_store(struct device *dev, 1359 struct device_attribute *attr, const char *buf, size_t len) 1360 { 1361 u64 disksize; 1362 struct zcomp *comp; 1363 struct zram *zram = dev_to_zram(dev); 1364 int err; 1365 1366 disksize = memparse(buf, NULL); 1367 if (!disksize) 1368 return -EINVAL; 1369 1370 down_write(&zram->init_lock); 1371 if (init_done(zram)) { 1372 pr_info("Cannot change disksize for initialized device\n"); 1373 err = -EBUSY; 1374 goto out_unlock; 1375 } 1376 1377 disksize = PAGE_ALIGN(disksize); 1378 if (!zram_meta_alloc(zram, disksize)) { 1379 err = -ENOMEM; 1380 goto out_unlock; 1381 } 1382 1383 comp = zcomp_create(zram->compressor); 1384 if (IS_ERR(comp)) { 1385 pr_err("Cannot initialise %s compressing backend\n", 1386 zram->compressor); 1387 err = PTR_ERR(comp); 1388 goto out_free_meta; 1389 } 1390 1391 zram->comp = comp; 1392 zram->disksize = disksize; 1393 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1394 zram_revalidate_disk(zram); 1395 up_write(&zram->init_lock); 1396 1397 return len; 1398 1399 out_free_meta: 1400 zram_meta_free(zram, disksize); 1401 out_unlock: 1402 up_write(&zram->init_lock); 1403 return err; 1404 } 1405 1406 static ssize_t reset_store(struct device *dev, 1407 struct device_attribute *attr, const char *buf, size_t len) 1408 { 1409 int ret; 1410 unsigned short do_reset; 1411 struct zram *zram; 1412 struct block_device *bdev; 1413 1414 ret = kstrtou16(buf, 10, &do_reset); 1415 if (ret) 1416 return ret; 1417 1418 if (!do_reset) 1419 return -EINVAL; 1420 1421 zram = dev_to_zram(dev); 1422 bdev = bdget_disk(zram->disk, 0); 1423 if (!bdev) 1424 return -ENOMEM; 1425 1426 mutex_lock(&bdev->bd_mutex); 1427 /* Do not reset an active device or claimed device */ 1428 if (bdev->bd_openers || zram->claim) { 1429 mutex_unlock(&bdev->bd_mutex); 1430 bdput(bdev); 1431 return -EBUSY; 1432 } 1433 1434 /* From now on, anyone can't open /dev/zram[0-9] */ 1435 zram->claim = true; 1436 mutex_unlock(&bdev->bd_mutex); 1437 1438 /* Make sure all the pending I/O are finished */ 1439 fsync_bdev(bdev); 1440 zram_reset_device(zram); 1441 zram_revalidate_disk(zram); 1442 bdput(bdev); 1443 1444 mutex_lock(&bdev->bd_mutex); 1445 zram->claim = false; 1446 mutex_unlock(&bdev->bd_mutex); 1447 1448 return len; 1449 } 1450 1451 static int zram_open(struct block_device *bdev, fmode_t mode) 1452 { 1453 int ret = 0; 1454 struct zram *zram; 1455 1456 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1457 1458 zram = bdev->bd_disk->private_data; 1459 /* zram was claimed to reset so open request fails */ 1460 if (zram->claim) 1461 ret = -EBUSY; 1462 1463 return ret; 1464 } 1465 1466 static const struct block_device_operations zram_devops = { 1467 .open = zram_open, 1468 .swap_slot_free_notify = zram_slot_free_notify, 1469 .rw_page = zram_rw_page, 1470 .owner = THIS_MODULE 1471 }; 1472 1473 static DEVICE_ATTR_WO(compact); 1474 static DEVICE_ATTR_RW(disksize); 1475 static DEVICE_ATTR_RO(initstate); 1476 static DEVICE_ATTR_WO(reset); 1477 static DEVICE_ATTR_WO(mem_limit); 1478 static DEVICE_ATTR_WO(mem_used_max); 1479 static DEVICE_ATTR_RW(max_comp_streams); 1480 static DEVICE_ATTR_RW(comp_algorithm); 1481 #ifdef CONFIG_ZRAM_WRITEBACK 1482 static DEVICE_ATTR_RW(backing_dev); 1483 #endif 1484 1485 static struct attribute *zram_disk_attrs[] = { 1486 &dev_attr_disksize.attr, 1487 &dev_attr_initstate.attr, 1488 &dev_attr_reset.attr, 1489 &dev_attr_compact.attr, 1490 &dev_attr_mem_limit.attr, 1491 &dev_attr_mem_used_max.attr, 1492 &dev_attr_max_comp_streams.attr, 1493 &dev_attr_comp_algorithm.attr, 1494 #ifdef CONFIG_ZRAM_WRITEBACK 1495 &dev_attr_backing_dev.attr, 1496 #endif 1497 &dev_attr_io_stat.attr, 1498 &dev_attr_mm_stat.attr, 1499 &dev_attr_debug_stat.attr, 1500 NULL, 1501 }; 1502 1503 static const struct attribute_group zram_disk_attr_group = { 1504 .attrs = zram_disk_attrs, 1505 }; 1506 1507 /* 1508 * Allocate and initialize new zram device. the function returns 1509 * '>= 0' device_id upon success, and negative value otherwise. 1510 */ 1511 static int zram_add(void) 1512 { 1513 struct zram *zram; 1514 struct request_queue *queue; 1515 int ret, device_id; 1516 1517 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1518 if (!zram) 1519 return -ENOMEM; 1520 1521 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1522 if (ret < 0) 1523 goto out_free_dev; 1524 device_id = ret; 1525 1526 init_rwsem(&zram->init_lock); 1527 1528 queue = blk_alloc_queue(GFP_KERNEL); 1529 if (!queue) { 1530 pr_err("Error allocating disk queue for device %d\n", 1531 device_id); 1532 ret = -ENOMEM; 1533 goto out_free_idr; 1534 } 1535 1536 blk_queue_make_request(queue, zram_make_request); 1537 1538 /* gendisk structure */ 1539 zram->disk = alloc_disk(1); 1540 if (!zram->disk) { 1541 pr_err("Error allocating disk structure for device %d\n", 1542 device_id); 1543 ret = -ENOMEM; 1544 goto out_free_queue; 1545 } 1546 1547 zram->disk->major = zram_major; 1548 zram->disk->first_minor = device_id; 1549 zram->disk->fops = &zram_devops; 1550 zram->disk->queue = queue; 1551 zram->disk->queue->queuedata = zram; 1552 zram->disk->private_data = zram; 1553 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1554 1555 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1556 set_capacity(zram->disk, 0); 1557 /* zram devices sort of resembles non-rotational disks */ 1558 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1559 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1560 /* 1561 * To ensure that we always get PAGE_SIZE aligned 1562 * and n*PAGE_SIZED sized I/O requests. 1563 */ 1564 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1565 blk_queue_logical_block_size(zram->disk->queue, 1566 ZRAM_LOGICAL_BLOCK_SIZE); 1567 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1568 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1569 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1570 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1571 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1572 1573 /* 1574 * zram_bio_discard() will clear all logical blocks if logical block 1575 * size is identical with physical block size(PAGE_SIZE). But if it is 1576 * different, we will skip discarding some parts of logical blocks in 1577 * the part of the request range which isn't aligned to physical block 1578 * size. So we can't ensure that all discarded logical blocks are 1579 * zeroed. 1580 */ 1581 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1582 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1583 1584 add_disk(zram->disk); 1585 1586 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1587 &zram_disk_attr_group); 1588 if (ret < 0) { 1589 pr_err("Error creating sysfs group for device %d\n", 1590 device_id); 1591 goto out_free_disk; 1592 } 1593 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1594 1595 pr_info("Added device: %s\n", zram->disk->disk_name); 1596 return device_id; 1597 1598 out_free_disk: 1599 del_gendisk(zram->disk); 1600 put_disk(zram->disk); 1601 out_free_queue: 1602 blk_cleanup_queue(queue); 1603 out_free_idr: 1604 idr_remove(&zram_index_idr, device_id); 1605 out_free_dev: 1606 kfree(zram); 1607 return ret; 1608 } 1609 1610 static int zram_remove(struct zram *zram) 1611 { 1612 struct block_device *bdev; 1613 1614 bdev = bdget_disk(zram->disk, 0); 1615 if (!bdev) 1616 return -ENOMEM; 1617 1618 mutex_lock(&bdev->bd_mutex); 1619 if (bdev->bd_openers || zram->claim) { 1620 mutex_unlock(&bdev->bd_mutex); 1621 bdput(bdev); 1622 return -EBUSY; 1623 } 1624 1625 zram->claim = true; 1626 mutex_unlock(&bdev->bd_mutex); 1627 1628 /* 1629 * Remove sysfs first, so no one will perform a disksize 1630 * store while we destroy the devices. This also helps during 1631 * hot_remove -- zram_reset_device() is the last holder of 1632 * ->init_lock, no later/concurrent disksize_store() or any 1633 * other sysfs handlers are possible. 1634 */ 1635 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1636 &zram_disk_attr_group); 1637 1638 /* Make sure all the pending I/O are finished */ 1639 fsync_bdev(bdev); 1640 zram_reset_device(zram); 1641 bdput(bdev); 1642 1643 pr_info("Removed device: %s\n", zram->disk->disk_name); 1644 1645 blk_cleanup_queue(zram->disk->queue); 1646 del_gendisk(zram->disk); 1647 put_disk(zram->disk); 1648 kfree(zram); 1649 return 0; 1650 } 1651 1652 /* zram-control sysfs attributes */ 1653 1654 /* 1655 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 1656 * sense that reading from this file does alter the state of your system -- it 1657 * creates a new un-initialized zram device and returns back this device's 1658 * device_id (or an error code if it fails to create a new device). 1659 */ 1660 static ssize_t hot_add_show(struct class *class, 1661 struct class_attribute *attr, 1662 char *buf) 1663 { 1664 int ret; 1665 1666 mutex_lock(&zram_index_mutex); 1667 ret = zram_add(); 1668 mutex_unlock(&zram_index_mutex); 1669 1670 if (ret < 0) 1671 return ret; 1672 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1673 } 1674 static CLASS_ATTR_RO(hot_add); 1675 1676 static ssize_t hot_remove_store(struct class *class, 1677 struct class_attribute *attr, 1678 const char *buf, 1679 size_t count) 1680 { 1681 struct zram *zram; 1682 int ret, dev_id; 1683 1684 /* dev_id is gendisk->first_minor, which is `int' */ 1685 ret = kstrtoint(buf, 10, &dev_id); 1686 if (ret) 1687 return ret; 1688 if (dev_id < 0) 1689 return -EINVAL; 1690 1691 mutex_lock(&zram_index_mutex); 1692 1693 zram = idr_find(&zram_index_idr, dev_id); 1694 if (zram) { 1695 ret = zram_remove(zram); 1696 if (!ret) 1697 idr_remove(&zram_index_idr, dev_id); 1698 } else { 1699 ret = -ENODEV; 1700 } 1701 1702 mutex_unlock(&zram_index_mutex); 1703 return ret ? ret : count; 1704 } 1705 static CLASS_ATTR_WO(hot_remove); 1706 1707 static struct attribute *zram_control_class_attrs[] = { 1708 &class_attr_hot_add.attr, 1709 &class_attr_hot_remove.attr, 1710 NULL, 1711 }; 1712 ATTRIBUTE_GROUPS(zram_control_class); 1713 1714 static struct class zram_control_class = { 1715 .name = "zram-control", 1716 .owner = THIS_MODULE, 1717 .class_groups = zram_control_class_groups, 1718 }; 1719 1720 static int zram_remove_cb(int id, void *ptr, void *data) 1721 { 1722 zram_remove(ptr); 1723 return 0; 1724 } 1725 1726 static void destroy_devices(void) 1727 { 1728 class_unregister(&zram_control_class); 1729 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1730 idr_destroy(&zram_index_idr); 1731 unregister_blkdev(zram_major, "zram"); 1732 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1733 } 1734 1735 static int __init zram_init(void) 1736 { 1737 int ret; 1738 1739 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 1740 zcomp_cpu_up_prepare, zcomp_cpu_dead); 1741 if (ret < 0) 1742 return ret; 1743 1744 ret = class_register(&zram_control_class); 1745 if (ret) { 1746 pr_err("Unable to register zram-control class\n"); 1747 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1748 return ret; 1749 } 1750 1751 zram_major = register_blkdev(0, "zram"); 1752 if (zram_major <= 0) { 1753 pr_err("Unable to get major number\n"); 1754 class_unregister(&zram_control_class); 1755 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1756 return -EBUSY; 1757 } 1758 1759 while (num_devices != 0) { 1760 mutex_lock(&zram_index_mutex); 1761 ret = zram_add(); 1762 mutex_unlock(&zram_index_mutex); 1763 if (ret < 0) 1764 goto out_error; 1765 num_devices--; 1766 } 1767 1768 return 0; 1769 1770 out_error: 1771 destroy_devices(); 1772 return ret; 1773 } 1774 1775 static void __exit zram_exit(void) 1776 { 1777 destroy_devices(); 1778 } 1779 1780 module_init(zram_init); 1781 module_exit(zram_exit); 1782 1783 module_param(num_devices, uint, 0); 1784 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1785 1786 MODULE_LICENSE("Dual BSD/GPL"); 1787 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1788 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1789