1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 #include <linux/kernel_read_file.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 #define ZRAM_MAX_ALGO_NAME_SZ 128 48 49 /* Module params (documentation at end) */ 50 static unsigned int num_devices = 1; 51 /* 52 * Pages that compress to sizes equals or greater than this are stored 53 * uncompressed in memory. 54 */ 55 static size_t huge_class_size; 56 57 static const struct block_device_operations zram_devops; 58 59 static void zram_free_page(struct zram *zram, size_t index); 60 static int zram_read_from_zspool(struct zram *zram, struct page *page, 61 u32 index); 62 63 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 64 65 static void zram_slot_lock_init(struct zram *zram, u32 index) 66 { 67 static struct lock_class_key __key; 68 69 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 70 &__key, 0); 71 } 72 73 /* 74 * entry locking rules: 75 * 76 * 1) Lock is exclusive 77 * 78 * 2) lock() function can sleep waiting for the lock 79 * 80 * 3) Lock owner can sleep 81 * 82 * 4) Use TRY lock variant when in atomic context 83 * - must check return value and handle locking failers 84 */ 85 static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) 86 { 87 unsigned long *lock = &zram->table[index].flags; 88 89 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 90 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 91 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 92 return true; 93 } 94 95 return false; 96 } 97 98 static void zram_slot_lock(struct zram *zram, u32 index) 99 { 100 unsigned long *lock = &zram->table[index].flags; 101 102 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 103 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 104 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 105 } 106 107 static void zram_slot_unlock(struct zram *zram, u32 index) 108 { 109 unsigned long *lock = &zram->table[index].flags; 110 111 mutex_release(slot_dep_map(zram, index), _RET_IP_); 112 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 113 } 114 115 static inline bool init_done(struct zram *zram) 116 { 117 return zram->disksize; 118 } 119 120 static inline struct zram *dev_to_zram(struct device *dev) 121 { 122 return (struct zram *)dev_to_disk(dev)->private_data; 123 } 124 125 static unsigned long zram_get_handle(struct zram *zram, u32 index) 126 { 127 return zram->table[index].handle; 128 } 129 130 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 131 { 132 zram->table[index].handle = handle; 133 } 134 135 static bool zram_test_flag(struct zram *zram, u32 index, 136 enum zram_pageflags flag) 137 { 138 return zram->table[index].flags & BIT(flag); 139 } 140 141 static void zram_set_flag(struct zram *zram, u32 index, 142 enum zram_pageflags flag) 143 { 144 zram->table[index].flags |= BIT(flag); 145 } 146 147 static void zram_clear_flag(struct zram *zram, u32 index, 148 enum zram_pageflags flag) 149 { 150 zram->table[index].flags &= ~BIT(flag); 151 } 152 153 static size_t zram_get_obj_size(struct zram *zram, u32 index) 154 { 155 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 156 } 157 158 static void zram_set_obj_size(struct zram *zram, 159 u32 index, size_t size) 160 { 161 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 162 163 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 164 } 165 166 static inline bool zram_allocated(struct zram *zram, u32 index) 167 { 168 return zram_get_obj_size(zram, index) || 169 zram_test_flag(zram, index, ZRAM_SAME) || 170 zram_test_flag(zram, index, ZRAM_WB); 171 } 172 173 static inline void update_used_max(struct zram *zram, const unsigned long pages) 174 { 175 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 176 177 do { 178 if (cur_max >= pages) 179 return; 180 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 181 &cur_max, pages)); 182 } 183 184 static bool zram_can_store_page(struct zram *zram) 185 { 186 unsigned long alloced_pages; 187 188 alloced_pages = zs_get_total_pages(zram->mem_pool); 189 update_used_max(zram, alloced_pages); 190 191 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 192 } 193 194 #if PAGE_SIZE != 4096 195 static inline bool is_partial_io(struct bio_vec *bvec) 196 { 197 return bvec->bv_len != PAGE_SIZE; 198 } 199 #define ZRAM_PARTIAL_IO 1 200 #else 201 static inline bool is_partial_io(struct bio_vec *bvec) 202 { 203 return false; 204 } 205 #endif 206 207 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) 208 { 209 prio &= ZRAM_COMP_PRIORITY_MASK; 210 /* 211 * Clear previous priority value first, in case if we recompress 212 * further an already recompressed page 213 */ 214 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << 215 ZRAM_COMP_PRIORITY_BIT1); 216 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 217 } 218 219 static inline u32 zram_get_priority(struct zram *zram, u32 index) 220 { 221 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; 222 223 return prio & ZRAM_COMP_PRIORITY_MASK; 224 } 225 226 static void zram_accessed(struct zram *zram, u32 index) 227 { 228 zram_clear_flag(zram, index, ZRAM_IDLE); 229 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 230 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 231 zram->table[index].ac_time = ktime_get_boottime(); 232 #endif 233 } 234 235 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 236 struct zram_pp_slot { 237 unsigned long index; 238 struct list_head entry; 239 }; 240 241 /* 242 * A post-processing bucket is, essentially, a size class, this defines 243 * the range (in bytes) of pp-slots sizes in particular bucket. 244 */ 245 #define PP_BUCKET_SIZE_RANGE 64 246 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 247 248 struct zram_pp_ctl { 249 struct list_head pp_buckets[NUM_PP_BUCKETS]; 250 }; 251 252 static struct zram_pp_ctl *init_pp_ctl(void) 253 { 254 struct zram_pp_ctl *ctl; 255 u32 idx; 256 257 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); 258 if (!ctl) 259 return NULL; 260 261 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 262 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 263 return ctl; 264 } 265 266 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 267 { 268 list_del_init(&pps->entry); 269 270 zram_slot_lock(zram, pps->index); 271 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); 272 zram_slot_unlock(zram, pps->index); 273 274 kfree(pps); 275 } 276 277 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 278 { 279 u32 idx; 280 281 if (!ctl) 282 return; 283 284 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 285 while (!list_empty(&ctl->pp_buckets[idx])) { 286 struct zram_pp_slot *pps; 287 288 pps = list_first_entry(&ctl->pp_buckets[idx], 289 struct zram_pp_slot, 290 entry); 291 release_pp_slot(zram, pps); 292 } 293 } 294 295 kfree(ctl); 296 } 297 298 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 299 u32 index) 300 { 301 struct zram_pp_slot *pps; 302 u32 bid; 303 304 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN); 305 if (!pps) 306 return false; 307 308 INIT_LIST_HEAD(&pps->entry); 309 pps->index = index; 310 311 bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 312 list_add(&pps->entry, &ctl->pp_buckets[bid]); 313 314 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); 315 return true; 316 } 317 318 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 319 { 320 struct zram_pp_slot *pps = NULL; 321 s32 idx = NUM_PP_BUCKETS - 1; 322 323 /* The higher the bucket id the more optimal slot post-processing is */ 324 while (idx >= 0) { 325 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 326 struct zram_pp_slot, 327 entry); 328 if (pps) 329 break; 330 331 idx--; 332 } 333 return pps; 334 } 335 #endif 336 337 static inline void zram_fill_page(void *ptr, unsigned long len, 338 unsigned long value) 339 { 340 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 341 memset_l(ptr, value, len / sizeof(unsigned long)); 342 } 343 344 static bool page_same_filled(void *ptr, unsigned long *element) 345 { 346 unsigned long *page; 347 unsigned long val; 348 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 349 350 page = (unsigned long *)ptr; 351 val = page[0]; 352 353 if (val != page[last_pos]) 354 return false; 355 356 for (pos = 1; pos < last_pos; pos++) { 357 if (val != page[pos]) 358 return false; 359 } 360 361 *element = val; 362 363 return true; 364 } 365 366 static ssize_t initstate_show(struct device *dev, 367 struct device_attribute *attr, char *buf) 368 { 369 u32 val; 370 struct zram *zram = dev_to_zram(dev); 371 372 down_read(&zram->init_lock); 373 val = init_done(zram); 374 up_read(&zram->init_lock); 375 376 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 377 } 378 379 static ssize_t disksize_show(struct device *dev, 380 struct device_attribute *attr, char *buf) 381 { 382 struct zram *zram = dev_to_zram(dev); 383 384 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 385 } 386 387 static ssize_t mem_limit_store(struct device *dev, 388 struct device_attribute *attr, const char *buf, size_t len) 389 { 390 u64 limit; 391 char *tmp; 392 struct zram *zram = dev_to_zram(dev); 393 394 limit = memparse(buf, &tmp); 395 if (buf == tmp) /* no chars parsed, invalid input */ 396 return -EINVAL; 397 398 down_write(&zram->init_lock); 399 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 400 up_write(&zram->init_lock); 401 402 return len; 403 } 404 405 static ssize_t mem_used_max_store(struct device *dev, 406 struct device_attribute *attr, const char *buf, size_t len) 407 { 408 int err; 409 unsigned long val; 410 struct zram *zram = dev_to_zram(dev); 411 412 err = kstrtoul(buf, 10, &val); 413 if (err || val != 0) 414 return -EINVAL; 415 416 down_read(&zram->init_lock); 417 if (init_done(zram)) { 418 atomic_long_set(&zram->stats.max_used_pages, 419 zs_get_total_pages(zram->mem_pool)); 420 } 421 up_read(&zram->init_lock); 422 423 return len; 424 } 425 426 /* 427 * Mark all pages which are older than or equal to cutoff as IDLE. 428 * Callers should hold the zram init lock in read mode 429 */ 430 static void mark_idle(struct zram *zram, ktime_t cutoff) 431 { 432 int is_idle = 1; 433 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 434 int index; 435 436 for (index = 0; index < nr_pages; index++) { 437 /* 438 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 439 * post-processing (recompress, writeback) happens to the 440 * ZRAM_SAME slot. 441 * 442 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 443 */ 444 zram_slot_lock(zram, index); 445 if (!zram_allocated(zram, index) || 446 zram_test_flag(zram, index, ZRAM_WB) || 447 zram_test_flag(zram, index, ZRAM_SAME)) { 448 zram_slot_unlock(zram, index); 449 continue; 450 } 451 452 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 453 is_idle = !cutoff || 454 ktime_after(cutoff, zram->table[index].ac_time); 455 #endif 456 if (is_idle) 457 zram_set_flag(zram, index, ZRAM_IDLE); 458 else 459 zram_clear_flag(zram, index, ZRAM_IDLE); 460 zram_slot_unlock(zram, index); 461 } 462 } 463 464 static ssize_t idle_store(struct device *dev, 465 struct device_attribute *attr, const char *buf, size_t len) 466 { 467 struct zram *zram = dev_to_zram(dev); 468 ktime_t cutoff_time = 0; 469 ssize_t rv = -EINVAL; 470 471 if (!sysfs_streq(buf, "all")) { 472 /* 473 * If it did not parse as 'all' try to treat it as an integer 474 * when we have memory tracking enabled. 475 */ 476 u64 age_sec; 477 478 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) 479 cutoff_time = ktime_sub(ktime_get_boottime(), 480 ns_to_ktime(age_sec * NSEC_PER_SEC)); 481 else 482 goto out; 483 } 484 485 down_read(&zram->init_lock); 486 if (!init_done(zram)) 487 goto out_unlock; 488 489 /* 490 * A cutoff_time of 0 marks everything as idle, this is the 491 * "all" behavior. 492 */ 493 mark_idle(zram, cutoff_time); 494 rv = len; 495 496 out_unlock: 497 up_read(&zram->init_lock); 498 out: 499 return rv; 500 } 501 502 #ifdef CONFIG_ZRAM_WRITEBACK 503 static ssize_t writeback_limit_enable_store(struct device *dev, 504 struct device_attribute *attr, const char *buf, size_t len) 505 { 506 struct zram *zram = dev_to_zram(dev); 507 u64 val; 508 ssize_t ret = -EINVAL; 509 510 if (kstrtoull(buf, 10, &val)) 511 return ret; 512 513 down_read(&zram->init_lock); 514 spin_lock(&zram->wb_limit_lock); 515 zram->wb_limit_enable = val; 516 spin_unlock(&zram->wb_limit_lock); 517 up_read(&zram->init_lock); 518 ret = len; 519 520 return ret; 521 } 522 523 static ssize_t writeback_limit_enable_show(struct device *dev, 524 struct device_attribute *attr, char *buf) 525 { 526 bool val; 527 struct zram *zram = dev_to_zram(dev); 528 529 down_read(&zram->init_lock); 530 spin_lock(&zram->wb_limit_lock); 531 val = zram->wb_limit_enable; 532 spin_unlock(&zram->wb_limit_lock); 533 up_read(&zram->init_lock); 534 535 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 536 } 537 538 static ssize_t writeback_limit_store(struct device *dev, 539 struct device_attribute *attr, const char *buf, size_t len) 540 { 541 struct zram *zram = dev_to_zram(dev); 542 u64 val; 543 ssize_t ret = -EINVAL; 544 545 if (kstrtoull(buf, 10, &val)) 546 return ret; 547 548 down_read(&zram->init_lock); 549 spin_lock(&zram->wb_limit_lock); 550 zram->bd_wb_limit = val; 551 spin_unlock(&zram->wb_limit_lock); 552 up_read(&zram->init_lock); 553 ret = len; 554 555 return ret; 556 } 557 558 static ssize_t writeback_limit_show(struct device *dev, 559 struct device_attribute *attr, char *buf) 560 { 561 u64 val; 562 struct zram *zram = dev_to_zram(dev); 563 564 down_read(&zram->init_lock); 565 spin_lock(&zram->wb_limit_lock); 566 val = zram->bd_wb_limit; 567 spin_unlock(&zram->wb_limit_lock); 568 up_read(&zram->init_lock); 569 570 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 571 } 572 573 static void reset_bdev(struct zram *zram) 574 { 575 if (!zram->backing_dev) 576 return; 577 578 /* hope filp_close flush all of IO */ 579 filp_close(zram->backing_dev, NULL); 580 zram->backing_dev = NULL; 581 zram->bdev = NULL; 582 zram->disk->fops = &zram_devops; 583 kvfree(zram->bitmap); 584 zram->bitmap = NULL; 585 } 586 587 static ssize_t backing_dev_show(struct device *dev, 588 struct device_attribute *attr, char *buf) 589 { 590 struct file *file; 591 struct zram *zram = dev_to_zram(dev); 592 char *p; 593 ssize_t ret; 594 595 down_read(&zram->init_lock); 596 file = zram->backing_dev; 597 if (!file) { 598 memcpy(buf, "none\n", 5); 599 up_read(&zram->init_lock); 600 return 5; 601 } 602 603 p = file_path(file, buf, PAGE_SIZE - 1); 604 if (IS_ERR(p)) { 605 ret = PTR_ERR(p); 606 goto out; 607 } 608 609 ret = strlen(p); 610 memmove(buf, p, ret); 611 buf[ret++] = '\n'; 612 out: 613 up_read(&zram->init_lock); 614 return ret; 615 } 616 617 static ssize_t backing_dev_store(struct device *dev, 618 struct device_attribute *attr, const char *buf, size_t len) 619 { 620 char *file_name; 621 size_t sz; 622 struct file *backing_dev = NULL; 623 struct inode *inode; 624 unsigned int bitmap_sz; 625 unsigned long nr_pages, *bitmap = NULL; 626 int err; 627 struct zram *zram = dev_to_zram(dev); 628 629 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 630 if (!file_name) 631 return -ENOMEM; 632 633 down_write(&zram->init_lock); 634 if (init_done(zram)) { 635 pr_info("Can't setup backing device for initialized device\n"); 636 err = -EBUSY; 637 goto out; 638 } 639 640 strscpy(file_name, buf, PATH_MAX); 641 /* ignore trailing newline */ 642 sz = strlen(file_name); 643 if (sz > 0 && file_name[sz - 1] == '\n') 644 file_name[sz - 1] = 0x00; 645 646 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 647 if (IS_ERR(backing_dev)) { 648 err = PTR_ERR(backing_dev); 649 backing_dev = NULL; 650 goto out; 651 } 652 653 inode = backing_dev->f_mapping->host; 654 655 /* Support only block device in this moment */ 656 if (!S_ISBLK(inode->i_mode)) { 657 err = -ENOTBLK; 658 goto out; 659 } 660 661 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 662 /* Refuse to use zero sized device (also prevents self reference) */ 663 if (!nr_pages) { 664 err = -EINVAL; 665 goto out; 666 } 667 668 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 669 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 670 if (!bitmap) { 671 err = -ENOMEM; 672 goto out; 673 } 674 675 reset_bdev(zram); 676 677 zram->bdev = I_BDEV(inode); 678 zram->backing_dev = backing_dev; 679 zram->bitmap = bitmap; 680 zram->nr_pages = nr_pages; 681 up_write(&zram->init_lock); 682 683 pr_info("setup backing device %s\n", file_name); 684 kfree(file_name); 685 686 return len; 687 out: 688 kvfree(bitmap); 689 690 if (backing_dev) 691 filp_close(backing_dev, NULL); 692 693 up_write(&zram->init_lock); 694 695 kfree(file_name); 696 697 return err; 698 } 699 700 static unsigned long alloc_block_bdev(struct zram *zram) 701 { 702 unsigned long blk_idx = 1; 703 retry: 704 /* skip 0 bit to confuse zram.handle = 0 */ 705 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 706 if (blk_idx == zram->nr_pages) 707 return 0; 708 709 if (test_and_set_bit(blk_idx, zram->bitmap)) 710 goto retry; 711 712 atomic64_inc(&zram->stats.bd_count); 713 return blk_idx; 714 } 715 716 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 717 { 718 int was_set; 719 720 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 721 WARN_ON_ONCE(!was_set); 722 atomic64_dec(&zram->stats.bd_count); 723 } 724 725 static void read_from_bdev_async(struct zram *zram, struct page *page, 726 unsigned long entry, struct bio *parent) 727 { 728 struct bio *bio; 729 730 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 731 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 732 __bio_add_page(bio, page, PAGE_SIZE, 0); 733 bio_chain(bio, parent); 734 submit_bio(bio); 735 } 736 737 #define PAGE_WB_SIG "page_index=" 738 739 #define PAGE_WRITEBACK 0 740 #define HUGE_WRITEBACK (1<<0) 741 #define IDLE_WRITEBACK (1<<1) 742 #define INCOMPRESSIBLE_WRITEBACK (1<<2) 743 744 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 745 unsigned long nr_pages, 746 unsigned long index, 747 struct zram_pp_ctl *ctl) 748 { 749 for (; nr_pages != 0; index++, nr_pages--) { 750 bool ok = true; 751 752 zram_slot_lock(zram, index); 753 if (!zram_allocated(zram, index)) 754 goto next; 755 756 if (zram_test_flag(zram, index, ZRAM_WB) || 757 zram_test_flag(zram, index, ZRAM_SAME)) 758 goto next; 759 760 if (mode & IDLE_WRITEBACK && 761 !zram_test_flag(zram, index, ZRAM_IDLE)) 762 goto next; 763 if (mode & HUGE_WRITEBACK && 764 !zram_test_flag(zram, index, ZRAM_HUGE)) 765 goto next; 766 if (mode & INCOMPRESSIBLE_WRITEBACK && 767 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 768 goto next; 769 770 ok = place_pp_slot(zram, ctl, index); 771 next: 772 zram_slot_unlock(zram, index); 773 if (!ok) 774 break; 775 } 776 777 return 0; 778 } 779 780 static ssize_t writeback_store(struct device *dev, 781 struct device_attribute *attr, const char *buf, size_t len) 782 { 783 struct zram *zram = dev_to_zram(dev); 784 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 785 struct zram_pp_ctl *ctl = NULL; 786 struct zram_pp_slot *pps; 787 unsigned long index = 0; 788 struct bio bio; 789 struct bio_vec bio_vec; 790 struct page *page = NULL; 791 ssize_t ret = len; 792 int mode, err; 793 unsigned long blk_idx = 0; 794 795 if (sysfs_streq(buf, "idle")) 796 mode = IDLE_WRITEBACK; 797 else if (sysfs_streq(buf, "huge")) 798 mode = HUGE_WRITEBACK; 799 else if (sysfs_streq(buf, "huge_idle")) 800 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 801 else if (sysfs_streq(buf, "incompressible")) 802 mode = INCOMPRESSIBLE_WRITEBACK; 803 else { 804 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 805 return -EINVAL; 806 807 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 808 index >= nr_pages) 809 return -EINVAL; 810 811 nr_pages = 1; 812 mode = PAGE_WRITEBACK; 813 } 814 815 down_read(&zram->init_lock); 816 if (!init_done(zram)) { 817 ret = -EINVAL; 818 goto release_init_lock; 819 } 820 821 /* Do not permit concurrent post-processing actions. */ 822 if (atomic_xchg(&zram->pp_in_progress, 1)) { 823 up_read(&zram->init_lock); 824 return -EAGAIN; 825 } 826 827 if (!zram->backing_dev) { 828 ret = -ENODEV; 829 goto release_init_lock; 830 } 831 832 page = alloc_page(GFP_KERNEL); 833 if (!page) { 834 ret = -ENOMEM; 835 goto release_init_lock; 836 } 837 838 ctl = init_pp_ctl(); 839 if (!ctl) { 840 ret = -ENOMEM; 841 goto release_init_lock; 842 } 843 844 scan_slots_for_writeback(zram, mode, nr_pages, index, ctl); 845 846 while ((pps = select_pp_slot(ctl))) { 847 spin_lock(&zram->wb_limit_lock); 848 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 849 spin_unlock(&zram->wb_limit_lock); 850 ret = -EIO; 851 break; 852 } 853 spin_unlock(&zram->wb_limit_lock); 854 855 if (!blk_idx) { 856 blk_idx = alloc_block_bdev(zram); 857 if (!blk_idx) { 858 ret = -ENOSPC; 859 break; 860 } 861 } 862 863 index = pps->index; 864 zram_slot_lock(zram, index); 865 /* 866 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so 867 * slots can change in the meantime. If slots are accessed or 868 * freed they lose ZRAM_PP_SLOT flag and hence we don't 869 * post-process them. 870 */ 871 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 872 goto next; 873 if (zram_read_from_zspool(zram, page, index)) 874 goto next; 875 zram_slot_unlock(zram, index); 876 877 bio_init(&bio, zram->bdev, &bio_vec, 1, 878 REQ_OP_WRITE | REQ_SYNC); 879 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 880 __bio_add_page(&bio, page, PAGE_SIZE, 0); 881 882 /* 883 * XXX: A single page IO would be inefficient for write 884 * but it would be not bad as starter. 885 */ 886 err = submit_bio_wait(&bio); 887 if (err) { 888 release_pp_slot(zram, pps); 889 /* 890 * BIO errors are not fatal, we continue and simply 891 * attempt to writeback the remaining objects (pages). 892 * At the same time we need to signal user-space that 893 * some writes (at least one, but also could be all of 894 * them) were not successful and we do so by returning 895 * the most recent BIO error. 896 */ 897 ret = err; 898 continue; 899 } 900 901 atomic64_inc(&zram->stats.bd_writes); 902 zram_slot_lock(zram, index); 903 /* 904 * Same as above, we release slot lock during writeback so 905 * slot can change under us: slot_free() or slot_free() and 906 * reallocation (zram_write_page()). In both cases slot loses 907 * ZRAM_PP_SLOT flag. No concurrent post-processing can set 908 * ZRAM_PP_SLOT on such slots until current post-processing 909 * finishes. 910 */ 911 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 912 goto next; 913 914 zram_free_page(zram, index); 915 zram_set_flag(zram, index, ZRAM_WB); 916 zram_set_handle(zram, index, blk_idx); 917 blk_idx = 0; 918 atomic64_inc(&zram->stats.pages_stored); 919 spin_lock(&zram->wb_limit_lock); 920 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 921 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 922 spin_unlock(&zram->wb_limit_lock); 923 next: 924 zram_slot_unlock(zram, index); 925 release_pp_slot(zram, pps); 926 927 cond_resched(); 928 } 929 930 if (blk_idx) 931 free_block_bdev(zram, blk_idx); 932 933 release_init_lock: 934 if (page) 935 __free_page(page); 936 release_pp_ctl(zram, ctl); 937 atomic_set(&zram->pp_in_progress, 0); 938 up_read(&zram->init_lock); 939 940 return ret; 941 } 942 943 struct zram_work { 944 struct work_struct work; 945 struct zram *zram; 946 unsigned long entry; 947 struct page *page; 948 int error; 949 }; 950 951 static void zram_sync_read(struct work_struct *work) 952 { 953 struct zram_work *zw = container_of(work, struct zram_work, work); 954 struct bio_vec bv; 955 struct bio bio; 956 957 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 958 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 959 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 960 zw->error = submit_bio_wait(&bio); 961 } 962 963 /* 964 * Block layer want one ->submit_bio to be active at a time, so if we use 965 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 966 * use a worker thread context. 967 */ 968 static int read_from_bdev_sync(struct zram *zram, struct page *page, 969 unsigned long entry) 970 { 971 struct zram_work work; 972 973 work.page = page; 974 work.zram = zram; 975 work.entry = entry; 976 977 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 978 queue_work(system_unbound_wq, &work.work); 979 flush_work(&work.work); 980 destroy_work_on_stack(&work.work); 981 982 return work.error; 983 } 984 985 static int read_from_bdev(struct zram *zram, struct page *page, 986 unsigned long entry, struct bio *parent) 987 { 988 atomic64_inc(&zram->stats.bd_reads); 989 if (!parent) { 990 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 991 return -EIO; 992 return read_from_bdev_sync(zram, page, entry); 993 } 994 read_from_bdev_async(zram, page, entry, parent); 995 return 0; 996 } 997 #else 998 static inline void reset_bdev(struct zram *zram) {}; 999 static int read_from_bdev(struct zram *zram, struct page *page, 1000 unsigned long entry, struct bio *parent) 1001 { 1002 return -EIO; 1003 } 1004 1005 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 1006 #endif 1007 1008 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1009 1010 static struct dentry *zram_debugfs_root; 1011 1012 static void zram_debugfs_create(void) 1013 { 1014 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1015 } 1016 1017 static void zram_debugfs_destroy(void) 1018 { 1019 debugfs_remove_recursive(zram_debugfs_root); 1020 } 1021 1022 static ssize_t read_block_state(struct file *file, char __user *buf, 1023 size_t count, loff_t *ppos) 1024 { 1025 char *kbuf; 1026 ssize_t index, written = 0; 1027 struct zram *zram = file->private_data; 1028 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1029 struct timespec64 ts; 1030 1031 kbuf = kvmalloc(count, GFP_KERNEL); 1032 if (!kbuf) 1033 return -ENOMEM; 1034 1035 down_read(&zram->init_lock); 1036 if (!init_done(zram)) { 1037 up_read(&zram->init_lock); 1038 kvfree(kbuf); 1039 return -EINVAL; 1040 } 1041 1042 for (index = *ppos; index < nr_pages; index++) { 1043 int copied; 1044 1045 zram_slot_lock(zram, index); 1046 if (!zram_allocated(zram, index)) 1047 goto next; 1048 1049 ts = ktime_to_timespec64(zram->table[index].ac_time); 1050 copied = snprintf(kbuf + written, count, 1051 "%12zd %12lld.%06lu %c%c%c%c%c%c\n", 1052 index, (s64)ts.tv_sec, 1053 ts.tv_nsec / NSEC_PER_USEC, 1054 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1055 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1056 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1057 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1058 zram_get_priority(zram, index) ? 'r' : '.', 1059 zram_test_flag(zram, index, 1060 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1061 1062 if (count <= copied) { 1063 zram_slot_unlock(zram, index); 1064 break; 1065 } 1066 written += copied; 1067 count -= copied; 1068 next: 1069 zram_slot_unlock(zram, index); 1070 *ppos += 1; 1071 } 1072 1073 up_read(&zram->init_lock); 1074 if (copy_to_user(buf, kbuf, written)) 1075 written = -EFAULT; 1076 kvfree(kbuf); 1077 1078 return written; 1079 } 1080 1081 static const struct file_operations proc_zram_block_state_op = { 1082 .open = simple_open, 1083 .read = read_block_state, 1084 .llseek = default_llseek, 1085 }; 1086 1087 static void zram_debugfs_register(struct zram *zram) 1088 { 1089 if (!zram_debugfs_root) 1090 return; 1091 1092 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1093 zram_debugfs_root); 1094 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1095 zram, &proc_zram_block_state_op); 1096 } 1097 1098 static void zram_debugfs_unregister(struct zram *zram) 1099 { 1100 debugfs_remove_recursive(zram->debugfs_dir); 1101 } 1102 #else 1103 static void zram_debugfs_create(void) {}; 1104 static void zram_debugfs_destroy(void) {}; 1105 static void zram_debugfs_register(struct zram *zram) {}; 1106 static void zram_debugfs_unregister(struct zram *zram) {}; 1107 #endif 1108 1109 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1110 { 1111 /* Do not free statically defined compression algorithms */ 1112 if (zram->comp_algs[prio] != default_compressor) 1113 kfree(zram->comp_algs[prio]); 1114 1115 zram->comp_algs[prio] = alg; 1116 } 1117 1118 static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) 1119 { 1120 ssize_t sz; 1121 1122 down_read(&zram->init_lock); 1123 sz = zcomp_available_show(zram->comp_algs[prio], buf); 1124 up_read(&zram->init_lock); 1125 1126 return sz; 1127 } 1128 1129 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1130 { 1131 char *compressor; 1132 size_t sz; 1133 1134 sz = strlen(buf); 1135 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1136 return -E2BIG; 1137 1138 compressor = kstrdup(buf, GFP_KERNEL); 1139 if (!compressor) 1140 return -ENOMEM; 1141 1142 /* ignore trailing newline */ 1143 if (sz > 0 && compressor[sz - 1] == '\n') 1144 compressor[sz - 1] = 0x00; 1145 1146 if (!zcomp_available_algorithm(compressor)) { 1147 kfree(compressor); 1148 return -EINVAL; 1149 } 1150 1151 down_write(&zram->init_lock); 1152 if (init_done(zram)) { 1153 up_write(&zram->init_lock); 1154 kfree(compressor); 1155 pr_info("Can't change algorithm for initialized device\n"); 1156 return -EBUSY; 1157 } 1158 1159 comp_algorithm_set(zram, prio, compressor); 1160 up_write(&zram->init_lock); 1161 return 0; 1162 } 1163 1164 static void comp_params_reset(struct zram *zram, u32 prio) 1165 { 1166 struct zcomp_params *params = &zram->params[prio]; 1167 1168 vfree(params->dict); 1169 params->level = ZCOMP_PARAM_NO_LEVEL; 1170 params->dict_sz = 0; 1171 params->dict = NULL; 1172 } 1173 1174 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1175 const char *dict_path) 1176 { 1177 ssize_t sz = 0; 1178 1179 comp_params_reset(zram, prio); 1180 1181 if (dict_path) { 1182 sz = kernel_read_file_from_path(dict_path, 0, 1183 &zram->params[prio].dict, 1184 INT_MAX, 1185 NULL, 1186 READING_POLICY); 1187 if (sz < 0) 1188 return -EINVAL; 1189 } 1190 1191 zram->params[prio].dict_sz = sz; 1192 zram->params[prio].level = level; 1193 return 0; 1194 } 1195 1196 static ssize_t algorithm_params_store(struct device *dev, 1197 struct device_attribute *attr, 1198 const char *buf, 1199 size_t len) 1200 { 1201 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL; 1202 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1203 struct zram *zram = dev_to_zram(dev); 1204 int ret; 1205 1206 args = skip_spaces(buf); 1207 while (*args) { 1208 args = next_arg(args, ¶m, &val); 1209 1210 if (!val || !*val) 1211 return -EINVAL; 1212 1213 if (!strcmp(param, "priority")) { 1214 ret = kstrtoint(val, 10, &prio); 1215 if (ret) 1216 return ret; 1217 continue; 1218 } 1219 1220 if (!strcmp(param, "level")) { 1221 ret = kstrtoint(val, 10, &level); 1222 if (ret) 1223 return ret; 1224 continue; 1225 } 1226 1227 if (!strcmp(param, "algo")) { 1228 algo = val; 1229 continue; 1230 } 1231 1232 if (!strcmp(param, "dict")) { 1233 dict_path = val; 1234 continue; 1235 } 1236 } 1237 1238 /* Lookup priority by algorithm name */ 1239 if (algo) { 1240 s32 p; 1241 1242 prio = -EINVAL; 1243 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1244 if (!zram->comp_algs[p]) 1245 continue; 1246 1247 if (!strcmp(zram->comp_algs[p], algo)) { 1248 prio = p; 1249 break; 1250 } 1251 } 1252 } 1253 1254 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1255 return -EINVAL; 1256 1257 ret = comp_params_store(zram, prio, level, dict_path); 1258 return ret ? ret : len; 1259 } 1260 1261 static ssize_t comp_algorithm_show(struct device *dev, 1262 struct device_attribute *attr, 1263 char *buf) 1264 { 1265 struct zram *zram = dev_to_zram(dev); 1266 1267 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); 1268 } 1269 1270 static ssize_t comp_algorithm_store(struct device *dev, 1271 struct device_attribute *attr, 1272 const char *buf, 1273 size_t len) 1274 { 1275 struct zram *zram = dev_to_zram(dev); 1276 int ret; 1277 1278 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1279 return ret ? ret : len; 1280 } 1281 1282 #ifdef CONFIG_ZRAM_MULTI_COMP 1283 static ssize_t recomp_algorithm_show(struct device *dev, 1284 struct device_attribute *attr, 1285 char *buf) 1286 { 1287 struct zram *zram = dev_to_zram(dev); 1288 ssize_t sz = 0; 1289 u32 prio; 1290 1291 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1292 if (!zram->comp_algs[prio]) 1293 continue; 1294 1295 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); 1296 sz += __comp_algorithm_show(zram, prio, buf + sz); 1297 } 1298 1299 return sz; 1300 } 1301 1302 static ssize_t recomp_algorithm_store(struct device *dev, 1303 struct device_attribute *attr, 1304 const char *buf, 1305 size_t len) 1306 { 1307 struct zram *zram = dev_to_zram(dev); 1308 int prio = ZRAM_SECONDARY_COMP; 1309 char *args, *param, *val; 1310 char *alg = NULL; 1311 int ret; 1312 1313 args = skip_spaces(buf); 1314 while (*args) { 1315 args = next_arg(args, ¶m, &val); 1316 1317 if (!val || !*val) 1318 return -EINVAL; 1319 1320 if (!strcmp(param, "algo")) { 1321 alg = val; 1322 continue; 1323 } 1324 1325 if (!strcmp(param, "priority")) { 1326 ret = kstrtoint(val, 10, &prio); 1327 if (ret) 1328 return ret; 1329 continue; 1330 } 1331 } 1332 1333 if (!alg) 1334 return -EINVAL; 1335 1336 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1337 return -EINVAL; 1338 1339 ret = __comp_algorithm_store(zram, prio, alg); 1340 return ret ? ret : len; 1341 } 1342 #endif 1343 1344 static ssize_t compact_store(struct device *dev, 1345 struct device_attribute *attr, const char *buf, size_t len) 1346 { 1347 struct zram *zram = dev_to_zram(dev); 1348 1349 down_read(&zram->init_lock); 1350 if (!init_done(zram)) { 1351 up_read(&zram->init_lock); 1352 return -EINVAL; 1353 } 1354 1355 zs_compact(zram->mem_pool); 1356 up_read(&zram->init_lock); 1357 1358 return len; 1359 } 1360 1361 static ssize_t io_stat_show(struct device *dev, 1362 struct device_attribute *attr, char *buf) 1363 { 1364 struct zram *zram = dev_to_zram(dev); 1365 ssize_t ret; 1366 1367 down_read(&zram->init_lock); 1368 ret = scnprintf(buf, PAGE_SIZE, 1369 "%8llu %8llu 0 %8llu\n", 1370 (u64)atomic64_read(&zram->stats.failed_reads), 1371 (u64)atomic64_read(&zram->stats.failed_writes), 1372 (u64)atomic64_read(&zram->stats.notify_free)); 1373 up_read(&zram->init_lock); 1374 1375 return ret; 1376 } 1377 1378 static ssize_t mm_stat_show(struct device *dev, 1379 struct device_attribute *attr, char *buf) 1380 { 1381 struct zram *zram = dev_to_zram(dev); 1382 struct zs_pool_stats pool_stats; 1383 u64 orig_size, mem_used = 0; 1384 long max_used; 1385 ssize_t ret; 1386 1387 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1388 1389 down_read(&zram->init_lock); 1390 if (init_done(zram)) { 1391 mem_used = zs_get_total_pages(zram->mem_pool); 1392 zs_pool_stats(zram->mem_pool, &pool_stats); 1393 } 1394 1395 orig_size = atomic64_read(&zram->stats.pages_stored); 1396 max_used = atomic_long_read(&zram->stats.max_used_pages); 1397 1398 ret = scnprintf(buf, PAGE_SIZE, 1399 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1400 orig_size << PAGE_SHIFT, 1401 (u64)atomic64_read(&zram->stats.compr_data_size), 1402 mem_used << PAGE_SHIFT, 1403 zram->limit_pages << PAGE_SHIFT, 1404 max_used << PAGE_SHIFT, 1405 (u64)atomic64_read(&zram->stats.same_pages), 1406 atomic_long_read(&pool_stats.pages_compacted), 1407 (u64)atomic64_read(&zram->stats.huge_pages), 1408 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1409 up_read(&zram->init_lock); 1410 1411 return ret; 1412 } 1413 1414 #ifdef CONFIG_ZRAM_WRITEBACK 1415 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1416 static ssize_t bd_stat_show(struct device *dev, 1417 struct device_attribute *attr, char *buf) 1418 { 1419 struct zram *zram = dev_to_zram(dev); 1420 ssize_t ret; 1421 1422 down_read(&zram->init_lock); 1423 ret = scnprintf(buf, PAGE_SIZE, 1424 "%8llu %8llu %8llu\n", 1425 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1426 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1427 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1428 up_read(&zram->init_lock); 1429 1430 return ret; 1431 } 1432 #endif 1433 1434 static ssize_t debug_stat_show(struct device *dev, 1435 struct device_attribute *attr, char *buf) 1436 { 1437 int version = 1; 1438 struct zram *zram = dev_to_zram(dev); 1439 ssize_t ret; 1440 1441 down_read(&zram->init_lock); 1442 ret = scnprintf(buf, PAGE_SIZE, 1443 "version: %d\n0 %8llu\n", 1444 version, 1445 (u64)atomic64_read(&zram->stats.miss_free)); 1446 up_read(&zram->init_lock); 1447 1448 return ret; 1449 } 1450 1451 static DEVICE_ATTR_RO(io_stat); 1452 static DEVICE_ATTR_RO(mm_stat); 1453 #ifdef CONFIG_ZRAM_WRITEBACK 1454 static DEVICE_ATTR_RO(bd_stat); 1455 #endif 1456 static DEVICE_ATTR_RO(debug_stat); 1457 1458 static void zram_meta_free(struct zram *zram, u64 disksize) 1459 { 1460 size_t num_pages = disksize >> PAGE_SHIFT; 1461 size_t index; 1462 1463 if (!zram->table) 1464 return; 1465 1466 /* Free all pages that are still in this zram device */ 1467 for (index = 0; index < num_pages; index++) 1468 zram_free_page(zram, index); 1469 1470 zs_destroy_pool(zram->mem_pool); 1471 vfree(zram->table); 1472 zram->table = NULL; 1473 } 1474 1475 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1476 { 1477 size_t num_pages, index; 1478 1479 num_pages = disksize >> PAGE_SHIFT; 1480 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1481 if (!zram->table) 1482 return false; 1483 1484 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1485 if (!zram->mem_pool) { 1486 vfree(zram->table); 1487 zram->table = NULL; 1488 return false; 1489 } 1490 1491 if (!huge_class_size) 1492 huge_class_size = zs_huge_class_size(zram->mem_pool); 1493 1494 for (index = 0; index < num_pages; index++) 1495 zram_slot_lock_init(zram, index); 1496 1497 return true; 1498 } 1499 1500 static void zram_free_page(struct zram *zram, size_t index) 1501 { 1502 unsigned long handle; 1503 1504 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 1505 zram->table[index].ac_time = 0; 1506 #endif 1507 1508 zram_clear_flag(zram, index, ZRAM_IDLE); 1509 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1510 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 1511 zram_set_priority(zram, index, 0); 1512 1513 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1514 zram_clear_flag(zram, index, ZRAM_HUGE); 1515 atomic64_dec(&zram->stats.huge_pages); 1516 } 1517 1518 if (zram_test_flag(zram, index, ZRAM_WB)) { 1519 zram_clear_flag(zram, index, ZRAM_WB); 1520 free_block_bdev(zram, zram_get_handle(zram, index)); 1521 goto out; 1522 } 1523 1524 /* 1525 * No memory is allocated for same element filled pages. 1526 * Simply clear same page flag. 1527 */ 1528 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1529 zram_clear_flag(zram, index, ZRAM_SAME); 1530 atomic64_dec(&zram->stats.same_pages); 1531 goto out; 1532 } 1533 1534 handle = zram_get_handle(zram, index); 1535 if (!handle) 1536 return; 1537 1538 zs_free(zram->mem_pool, handle); 1539 1540 atomic64_sub(zram_get_obj_size(zram, index), 1541 &zram->stats.compr_data_size); 1542 out: 1543 atomic64_dec(&zram->stats.pages_stored); 1544 zram_set_handle(zram, index, 0); 1545 zram_set_obj_size(zram, index, 0); 1546 } 1547 1548 static int read_same_filled_page(struct zram *zram, struct page *page, 1549 u32 index) 1550 { 1551 void *mem; 1552 1553 mem = kmap_local_page(page); 1554 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index)); 1555 kunmap_local(mem); 1556 return 0; 1557 } 1558 1559 static int read_incompressible_page(struct zram *zram, struct page *page, 1560 u32 index) 1561 { 1562 unsigned long handle; 1563 void *src, *dst; 1564 1565 handle = zram_get_handle(zram, index); 1566 src = zs_obj_read_begin(zram->mem_pool, handle, NULL); 1567 dst = kmap_local_page(page); 1568 copy_page(dst, src); 1569 kunmap_local(dst); 1570 zs_obj_read_end(zram->mem_pool, handle, src); 1571 1572 return 0; 1573 } 1574 1575 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 1576 { 1577 struct zcomp_strm *zstrm; 1578 unsigned long handle; 1579 unsigned int size; 1580 void *src, *dst; 1581 int ret, prio; 1582 1583 handle = zram_get_handle(zram, index); 1584 size = zram_get_obj_size(zram, index); 1585 prio = zram_get_priority(zram, index); 1586 1587 zstrm = zcomp_stream_get(zram->comps[prio]); 1588 src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); 1589 dst = kmap_local_page(page); 1590 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 1591 kunmap_local(dst); 1592 zs_obj_read_end(zram->mem_pool, handle, src); 1593 zcomp_stream_put(zstrm); 1594 1595 return ret; 1596 } 1597 1598 /* 1599 * Reads (decompresses if needed) a page from zspool (zsmalloc). 1600 * Corresponding ZRAM slot should be locked. 1601 */ 1602 static int zram_read_from_zspool(struct zram *zram, struct page *page, 1603 u32 index) 1604 { 1605 if (zram_test_flag(zram, index, ZRAM_SAME) || 1606 !zram_get_handle(zram, index)) 1607 return read_same_filled_page(zram, page, index); 1608 1609 if (!zram_test_flag(zram, index, ZRAM_HUGE)) 1610 return read_compressed_page(zram, page, index); 1611 else 1612 return read_incompressible_page(zram, page, index); 1613 } 1614 1615 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 1616 struct bio *parent) 1617 { 1618 int ret; 1619 1620 zram_slot_lock(zram, index); 1621 if (!zram_test_flag(zram, index, ZRAM_WB)) { 1622 /* Slot should be locked through out the function call */ 1623 ret = zram_read_from_zspool(zram, page, index); 1624 zram_slot_unlock(zram, index); 1625 } else { 1626 /* 1627 * The slot should be unlocked before reading from the backing 1628 * device. 1629 */ 1630 zram_slot_unlock(zram, index); 1631 1632 ret = read_from_bdev(zram, page, zram_get_handle(zram, index), 1633 parent); 1634 } 1635 1636 /* Should NEVER happen. Return bio error if it does. */ 1637 if (WARN_ON(ret < 0)) 1638 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1639 1640 return ret; 1641 } 1642 1643 /* 1644 * Use a temporary buffer to decompress the page, as the decompressor 1645 * always expects a full page for the output. 1646 */ 1647 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 1648 u32 index, int offset) 1649 { 1650 struct page *page = alloc_page(GFP_NOIO); 1651 int ret; 1652 1653 if (!page) 1654 return -ENOMEM; 1655 ret = zram_read_page(zram, page, index, NULL); 1656 if (likely(!ret)) 1657 memcpy_to_bvec(bvec, page_address(page) + offset); 1658 __free_page(page); 1659 return ret; 1660 } 1661 1662 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1663 u32 index, int offset, struct bio *bio) 1664 { 1665 if (is_partial_io(bvec)) 1666 return zram_bvec_read_partial(zram, bvec, index, offset); 1667 return zram_read_page(zram, bvec->bv_page, index, bio); 1668 } 1669 1670 static int write_same_filled_page(struct zram *zram, unsigned long fill, 1671 u32 index) 1672 { 1673 zram_slot_lock(zram, index); 1674 zram_set_flag(zram, index, ZRAM_SAME); 1675 zram_set_handle(zram, index, fill); 1676 zram_slot_unlock(zram, index); 1677 1678 atomic64_inc(&zram->stats.same_pages); 1679 atomic64_inc(&zram->stats.pages_stored); 1680 1681 return 0; 1682 } 1683 1684 static int write_incompressible_page(struct zram *zram, struct page *page, 1685 u32 index) 1686 { 1687 unsigned long handle; 1688 void *src; 1689 1690 /* 1691 * This function is called from preemptible context so we don't need 1692 * to do optimistic and fallback to pessimistic handle allocation, 1693 * like we do for compressible pages. 1694 */ 1695 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 1696 GFP_NOIO | __GFP_NOWARN | 1697 __GFP_HIGHMEM | __GFP_MOVABLE); 1698 if (IS_ERR_VALUE(handle)) 1699 return PTR_ERR((void *)handle); 1700 1701 if (!zram_can_store_page(zram)) { 1702 zs_free(zram->mem_pool, handle); 1703 return -ENOMEM; 1704 } 1705 1706 src = kmap_local_page(page); 1707 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 1708 kunmap_local(src); 1709 1710 zram_slot_lock(zram, index); 1711 zram_set_flag(zram, index, ZRAM_HUGE); 1712 zram_set_handle(zram, index, handle); 1713 zram_set_obj_size(zram, index, PAGE_SIZE); 1714 zram_slot_unlock(zram, index); 1715 1716 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 1717 atomic64_inc(&zram->stats.huge_pages); 1718 atomic64_inc(&zram->stats.huge_pages_since); 1719 atomic64_inc(&zram->stats.pages_stored); 1720 1721 return 0; 1722 } 1723 1724 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 1725 { 1726 int ret = 0; 1727 unsigned long handle; 1728 unsigned int comp_len; 1729 void *mem; 1730 struct zcomp_strm *zstrm; 1731 unsigned long element; 1732 bool same_filled; 1733 1734 /* First, free memory allocated to this slot (if any) */ 1735 zram_slot_lock(zram, index); 1736 zram_free_page(zram, index); 1737 zram_slot_unlock(zram, index); 1738 1739 mem = kmap_local_page(page); 1740 same_filled = page_same_filled(mem, &element); 1741 kunmap_local(mem); 1742 if (same_filled) 1743 return write_same_filled_page(zram, element, index); 1744 1745 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1746 mem = kmap_local_page(page); 1747 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 1748 mem, &comp_len); 1749 kunmap_local(mem); 1750 1751 if (unlikely(ret)) { 1752 zcomp_stream_put(zstrm); 1753 pr_err("Compression failed! err=%d\n", ret); 1754 return ret; 1755 } 1756 1757 if (comp_len >= huge_class_size) { 1758 zcomp_stream_put(zstrm); 1759 return write_incompressible_page(zram, page, index); 1760 } 1761 1762 handle = zs_malloc(zram->mem_pool, comp_len, 1763 GFP_NOIO | __GFP_NOWARN | 1764 __GFP_HIGHMEM | __GFP_MOVABLE); 1765 if (IS_ERR_VALUE(handle)) { 1766 zcomp_stream_put(zstrm); 1767 return PTR_ERR((void *)handle); 1768 } 1769 1770 if (!zram_can_store_page(zram)) { 1771 zcomp_stream_put(zstrm); 1772 zs_free(zram->mem_pool, handle); 1773 return -ENOMEM; 1774 } 1775 1776 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 1777 zcomp_stream_put(zstrm); 1778 1779 zram_slot_lock(zram, index); 1780 zram_set_handle(zram, index, handle); 1781 zram_set_obj_size(zram, index, comp_len); 1782 zram_slot_unlock(zram, index); 1783 1784 /* Update stats */ 1785 atomic64_inc(&zram->stats.pages_stored); 1786 atomic64_add(comp_len, &zram->stats.compr_data_size); 1787 1788 return ret; 1789 } 1790 1791 /* 1792 * This is a partial IO. Read the full page before writing the changes. 1793 */ 1794 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 1795 u32 index, int offset, struct bio *bio) 1796 { 1797 struct page *page = alloc_page(GFP_NOIO); 1798 int ret; 1799 1800 if (!page) 1801 return -ENOMEM; 1802 1803 ret = zram_read_page(zram, page, index, bio); 1804 if (!ret) { 1805 memcpy_from_bvec(page_address(page) + offset, bvec); 1806 ret = zram_write_page(zram, page, index); 1807 } 1808 __free_page(page); 1809 return ret; 1810 } 1811 1812 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1813 u32 index, int offset, struct bio *bio) 1814 { 1815 if (is_partial_io(bvec)) 1816 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 1817 return zram_write_page(zram, bvec->bv_page, index); 1818 } 1819 1820 #ifdef CONFIG_ZRAM_MULTI_COMP 1821 #define RECOMPRESS_IDLE (1 << 0) 1822 #define RECOMPRESS_HUGE (1 << 1) 1823 1824 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, 1825 struct zram_pp_ctl *ctl) 1826 { 1827 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1828 unsigned long index; 1829 1830 for (index = 0; index < nr_pages; index++) { 1831 bool ok = true; 1832 1833 zram_slot_lock(zram, index); 1834 if (!zram_allocated(zram, index)) 1835 goto next; 1836 1837 if (mode & RECOMPRESS_IDLE && 1838 !zram_test_flag(zram, index, ZRAM_IDLE)) 1839 goto next; 1840 1841 if (mode & RECOMPRESS_HUGE && 1842 !zram_test_flag(zram, index, ZRAM_HUGE)) 1843 goto next; 1844 1845 if (zram_test_flag(zram, index, ZRAM_WB) || 1846 zram_test_flag(zram, index, ZRAM_SAME) || 1847 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1848 goto next; 1849 1850 /* Already compressed with same of higher priority */ 1851 if (zram_get_priority(zram, index) + 1 >= prio_max) 1852 goto next; 1853 1854 ok = place_pp_slot(zram, ctl, index); 1855 next: 1856 zram_slot_unlock(zram, index); 1857 if (!ok) 1858 break; 1859 } 1860 1861 return 0; 1862 } 1863 1864 /* 1865 * This function will decompress (unless it's ZRAM_HUGE) the page and then 1866 * attempt to compress it using provided compression algorithm priority 1867 * (which is potentially more effective). 1868 * 1869 * Corresponding ZRAM slot should be locked. 1870 */ 1871 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 1872 u64 *num_recomp_pages, u32 threshold, u32 prio, 1873 u32 prio_max) 1874 { 1875 struct zcomp_strm *zstrm = NULL; 1876 unsigned long handle_old; 1877 unsigned long handle_new; 1878 unsigned int comp_len_old; 1879 unsigned int comp_len_new; 1880 unsigned int class_index_old; 1881 unsigned int class_index_new; 1882 void *src; 1883 int ret = 0; 1884 1885 handle_old = zram_get_handle(zram, index); 1886 if (!handle_old) 1887 return -EINVAL; 1888 1889 comp_len_old = zram_get_obj_size(zram, index); 1890 /* 1891 * Do not recompress objects that are already "small enough". 1892 */ 1893 if (comp_len_old < threshold) 1894 return 0; 1895 1896 ret = zram_read_from_zspool(zram, page, index); 1897 if (ret) 1898 return ret; 1899 1900 /* 1901 * We touched this entry so mark it as non-IDLE. This makes sure that 1902 * we don't preserve IDLE flag and don't incorrectly pick this entry 1903 * for different post-processing type (e.g. writeback). 1904 */ 1905 zram_clear_flag(zram, index, ZRAM_IDLE); 1906 1907 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 1908 1909 prio = max(prio, zram_get_priority(zram, index) + 1); 1910 /* 1911 * Recompression slots scan should not select slots that are 1912 * already compressed with a higher priority algorithm, but 1913 * just in case 1914 */ 1915 if (prio >= prio_max) 1916 return 0; 1917 1918 /* 1919 * Iterate the secondary comp algorithms list (in order of priority) 1920 * and try to recompress the page. 1921 */ 1922 for (; prio < prio_max; prio++) { 1923 if (!zram->comps[prio]) 1924 continue; 1925 1926 zstrm = zcomp_stream_get(zram->comps[prio]); 1927 src = kmap_local_page(page); 1928 ret = zcomp_compress(zram->comps[prio], zstrm, 1929 src, &comp_len_new); 1930 kunmap_local(src); 1931 1932 if (ret) { 1933 zcomp_stream_put(zstrm); 1934 zstrm = NULL; 1935 break; 1936 } 1937 1938 class_index_new = zs_lookup_class_index(zram->mem_pool, 1939 comp_len_new); 1940 1941 /* Continue until we make progress */ 1942 if (class_index_new >= class_index_old || 1943 (threshold && comp_len_new >= threshold)) { 1944 zcomp_stream_put(zstrm); 1945 zstrm = NULL; 1946 continue; 1947 } 1948 1949 /* Recompression was successful so break out */ 1950 break; 1951 } 1952 1953 /* 1954 * Decrement the limit (if set) on pages we can recompress, even 1955 * when current recompression was unsuccessful or did not compress 1956 * the page below the threshold, because we still spent resources 1957 * on it. 1958 */ 1959 if (*num_recomp_pages) 1960 *num_recomp_pages -= 1; 1961 1962 /* Compression error */ 1963 if (ret) 1964 return ret; 1965 1966 if (!zstrm) { 1967 /* 1968 * Secondary algorithms failed to re-compress the page 1969 * in a way that would save memory. 1970 * 1971 * Mark the object incompressible if the max-priority 1972 * algorithm couldn't re-compress it. 1973 */ 1974 if (prio < zram->num_active_comps) 1975 return 0; 1976 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1977 return 0; 1978 } 1979 1980 /* 1981 * We are holding per-CPU stream mutex and entry lock so better 1982 * avoid direct reclaim. Allocation error is not fatal since 1983 * we still have the old object in the mem_pool. 1984 */ 1985 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 1986 GFP_NOIO | __GFP_NOWARN | 1987 __GFP_HIGHMEM | __GFP_MOVABLE); 1988 if (IS_ERR_VALUE(handle_new)) { 1989 zcomp_stream_put(zstrm); 1990 return PTR_ERR((void *)handle_new); 1991 } 1992 1993 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 1994 zcomp_stream_put(zstrm); 1995 1996 zram_free_page(zram, index); 1997 zram_set_handle(zram, index, handle_new); 1998 zram_set_obj_size(zram, index, comp_len_new); 1999 zram_set_priority(zram, index, prio); 2000 2001 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2002 atomic64_inc(&zram->stats.pages_stored); 2003 2004 return 0; 2005 } 2006 2007 static ssize_t recompress_store(struct device *dev, 2008 struct device_attribute *attr, 2009 const char *buf, size_t len) 2010 { 2011 struct zram *zram = dev_to_zram(dev); 2012 char *args, *param, *val, *algo = NULL; 2013 u64 num_recomp_pages = ULLONG_MAX; 2014 struct zram_pp_ctl *ctl = NULL; 2015 struct zram_pp_slot *pps; 2016 u32 mode = 0, threshold = 0; 2017 u32 prio, prio_max; 2018 struct page *page = NULL; 2019 ssize_t ret; 2020 2021 prio = ZRAM_SECONDARY_COMP; 2022 prio_max = zram->num_active_comps; 2023 2024 args = skip_spaces(buf); 2025 while (*args) { 2026 args = next_arg(args, ¶m, &val); 2027 2028 if (!val || !*val) 2029 return -EINVAL; 2030 2031 if (!strcmp(param, "type")) { 2032 if (!strcmp(val, "idle")) 2033 mode = RECOMPRESS_IDLE; 2034 if (!strcmp(val, "huge")) 2035 mode = RECOMPRESS_HUGE; 2036 if (!strcmp(val, "huge_idle")) 2037 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2038 continue; 2039 } 2040 2041 if (!strcmp(param, "max_pages")) { 2042 /* 2043 * Limit the number of entries (pages) we attempt to 2044 * recompress. 2045 */ 2046 ret = kstrtoull(val, 10, &num_recomp_pages); 2047 if (ret) 2048 return ret; 2049 continue; 2050 } 2051 2052 if (!strcmp(param, "threshold")) { 2053 /* 2054 * We will re-compress only idle objects equal or 2055 * greater in size than watermark. 2056 */ 2057 ret = kstrtouint(val, 10, &threshold); 2058 if (ret) 2059 return ret; 2060 continue; 2061 } 2062 2063 if (!strcmp(param, "algo")) { 2064 algo = val; 2065 continue; 2066 } 2067 2068 if (!strcmp(param, "priority")) { 2069 ret = kstrtouint(val, 10, &prio); 2070 if (ret) 2071 return ret; 2072 2073 if (prio == ZRAM_PRIMARY_COMP) 2074 prio = ZRAM_SECONDARY_COMP; 2075 2076 prio_max = prio + 1; 2077 continue; 2078 } 2079 } 2080 2081 if (threshold >= huge_class_size) 2082 return -EINVAL; 2083 2084 down_read(&zram->init_lock); 2085 if (!init_done(zram)) { 2086 ret = -EINVAL; 2087 goto release_init_lock; 2088 } 2089 2090 /* Do not permit concurrent post-processing actions. */ 2091 if (atomic_xchg(&zram->pp_in_progress, 1)) { 2092 up_read(&zram->init_lock); 2093 return -EAGAIN; 2094 } 2095 2096 if (algo) { 2097 bool found = false; 2098 2099 for (; prio < ZRAM_MAX_COMPS; prio++) { 2100 if (!zram->comp_algs[prio]) 2101 continue; 2102 2103 if (!strcmp(zram->comp_algs[prio], algo)) { 2104 prio_max = prio + 1; 2105 found = true; 2106 break; 2107 } 2108 } 2109 2110 if (!found) { 2111 ret = -EINVAL; 2112 goto release_init_lock; 2113 } 2114 } 2115 2116 prio_max = min(prio_max, (u32)zram->num_active_comps); 2117 if (prio >= prio_max) { 2118 ret = -EINVAL; 2119 goto release_init_lock; 2120 } 2121 2122 page = alloc_page(GFP_KERNEL); 2123 if (!page) { 2124 ret = -ENOMEM; 2125 goto release_init_lock; 2126 } 2127 2128 ctl = init_pp_ctl(); 2129 if (!ctl) { 2130 ret = -ENOMEM; 2131 goto release_init_lock; 2132 } 2133 2134 scan_slots_for_recompress(zram, mode, prio_max, ctl); 2135 2136 ret = len; 2137 while ((pps = select_pp_slot(ctl))) { 2138 int err = 0; 2139 2140 if (!num_recomp_pages) 2141 break; 2142 2143 zram_slot_lock(zram, pps->index); 2144 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) 2145 goto next; 2146 2147 err = recompress_slot(zram, pps->index, page, 2148 &num_recomp_pages, threshold, 2149 prio, prio_max); 2150 next: 2151 zram_slot_unlock(zram, pps->index); 2152 release_pp_slot(zram, pps); 2153 2154 if (err) { 2155 ret = err; 2156 break; 2157 } 2158 2159 cond_resched(); 2160 } 2161 2162 release_init_lock: 2163 if (page) 2164 __free_page(page); 2165 release_pp_ctl(zram, ctl); 2166 atomic_set(&zram->pp_in_progress, 0); 2167 up_read(&zram->init_lock); 2168 return ret; 2169 } 2170 #endif 2171 2172 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2173 { 2174 size_t n = bio->bi_iter.bi_size; 2175 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2176 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2177 SECTOR_SHIFT; 2178 2179 /* 2180 * zram manages data in physical block size units. Because logical block 2181 * size isn't identical with physical block size on some arch, we 2182 * could get a discard request pointing to a specific offset within a 2183 * certain physical block. Although we can handle this request by 2184 * reading that physiclal block and decompressing and partially zeroing 2185 * and re-compressing and then re-storing it, this isn't reasonable 2186 * because our intent with a discard request is to save memory. So 2187 * skipping this logical block is appropriate here. 2188 */ 2189 if (offset) { 2190 if (n <= (PAGE_SIZE - offset)) 2191 return; 2192 2193 n -= (PAGE_SIZE - offset); 2194 index++; 2195 } 2196 2197 while (n >= PAGE_SIZE) { 2198 zram_slot_lock(zram, index); 2199 zram_free_page(zram, index); 2200 zram_slot_unlock(zram, index); 2201 atomic64_inc(&zram->stats.notify_free); 2202 index++; 2203 n -= PAGE_SIZE; 2204 } 2205 2206 bio_endio(bio); 2207 } 2208 2209 static void zram_bio_read(struct zram *zram, struct bio *bio) 2210 { 2211 unsigned long start_time = bio_start_io_acct(bio); 2212 struct bvec_iter iter = bio->bi_iter; 2213 2214 do { 2215 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2216 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2217 SECTOR_SHIFT; 2218 struct bio_vec bv = bio_iter_iovec(bio, iter); 2219 2220 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2221 2222 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2223 atomic64_inc(&zram->stats.failed_reads); 2224 bio->bi_status = BLK_STS_IOERR; 2225 break; 2226 } 2227 flush_dcache_page(bv.bv_page); 2228 2229 zram_slot_lock(zram, index); 2230 zram_accessed(zram, index); 2231 zram_slot_unlock(zram, index); 2232 2233 bio_advance_iter_single(bio, &iter, bv.bv_len); 2234 } while (iter.bi_size); 2235 2236 bio_end_io_acct(bio, start_time); 2237 bio_endio(bio); 2238 } 2239 2240 static void zram_bio_write(struct zram *zram, struct bio *bio) 2241 { 2242 unsigned long start_time = bio_start_io_acct(bio); 2243 struct bvec_iter iter = bio->bi_iter; 2244 2245 do { 2246 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2247 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2248 SECTOR_SHIFT; 2249 struct bio_vec bv = bio_iter_iovec(bio, iter); 2250 2251 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2252 2253 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2254 atomic64_inc(&zram->stats.failed_writes); 2255 bio->bi_status = BLK_STS_IOERR; 2256 break; 2257 } 2258 2259 zram_slot_lock(zram, index); 2260 zram_accessed(zram, index); 2261 zram_slot_unlock(zram, index); 2262 2263 bio_advance_iter_single(bio, &iter, bv.bv_len); 2264 } while (iter.bi_size); 2265 2266 bio_end_io_acct(bio, start_time); 2267 bio_endio(bio); 2268 } 2269 2270 /* 2271 * Handler function for all zram I/O requests. 2272 */ 2273 static void zram_submit_bio(struct bio *bio) 2274 { 2275 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2276 2277 switch (bio_op(bio)) { 2278 case REQ_OP_READ: 2279 zram_bio_read(zram, bio); 2280 break; 2281 case REQ_OP_WRITE: 2282 zram_bio_write(zram, bio); 2283 break; 2284 case REQ_OP_DISCARD: 2285 case REQ_OP_WRITE_ZEROES: 2286 zram_bio_discard(zram, bio); 2287 break; 2288 default: 2289 WARN_ON_ONCE(1); 2290 bio_endio(bio); 2291 } 2292 } 2293 2294 static void zram_slot_free_notify(struct block_device *bdev, 2295 unsigned long index) 2296 { 2297 struct zram *zram; 2298 2299 zram = bdev->bd_disk->private_data; 2300 2301 atomic64_inc(&zram->stats.notify_free); 2302 if (!zram_slot_trylock(zram, index)) { 2303 atomic64_inc(&zram->stats.miss_free); 2304 return; 2305 } 2306 2307 zram_free_page(zram, index); 2308 zram_slot_unlock(zram, index); 2309 } 2310 2311 static void zram_comp_params_reset(struct zram *zram) 2312 { 2313 u32 prio; 2314 2315 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2316 comp_params_reset(zram, prio); 2317 } 2318 } 2319 2320 static void zram_destroy_comps(struct zram *zram) 2321 { 2322 u32 prio; 2323 2324 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2325 struct zcomp *comp = zram->comps[prio]; 2326 2327 zram->comps[prio] = NULL; 2328 if (!comp) 2329 continue; 2330 zcomp_destroy(comp); 2331 zram->num_active_comps--; 2332 } 2333 2334 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2335 /* Do not free statically defined compression algorithms */ 2336 if (zram->comp_algs[prio] != default_compressor) 2337 kfree(zram->comp_algs[prio]); 2338 zram->comp_algs[prio] = NULL; 2339 } 2340 2341 zram_comp_params_reset(zram); 2342 } 2343 2344 static void zram_reset_device(struct zram *zram) 2345 { 2346 down_write(&zram->init_lock); 2347 2348 zram->limit_pages = 0; 2349 2350 set_capacity_and_notify(zram->disk, 0); 2351 part_stat_set_all(zram->disk->part0, 0); 2352 2353 /* I/O operation under all of CPU are done so let's free */ 2354 zram_meta_free(zram, zram->disksize); 2355 zram->disksize = 0; 2356 zram_destroy_comps(zram); 2357 memset(&zram->stats, 0, sizeof(zram->stats)); 2358 atomic_set(&zram->pp_in_progress, 0); 2359 reset_bdev(zram); 2360 2361 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2362 up_write(&zram->init_lock); 2363 } 2364 2365 static ssize_t disksize_store(struct device *dev, 2366 struct device_attribute *attr, const char *buf, size_t len) 2367 { 2368 u64 disksize; 2369 struct zcomp *comp; 2370 struct zram *zram = dev_to_zram(dev); 2371 int err; 2372 u32 prio; 2373 2374 disksize = memparse(buf, NULL); 2375 if (!disksize) 2376 return -EINVAL; 2377 2378 down_write(&zram->init_lock); 2379 if (init_done(zram)) { 2380 pr_info("Cannot change disksize for initialized device\n"); 2381 err = -EBUSY; 2382 goto out_unlock; 2383 } 2384 2385 disksize = PAGE_ALIGN(disksize); 2386 if (!zram_meta_alloc(zram, disksize)) { 2387 err = -ENOMEM; 2388 goto out_unlock; 2389 } 2390 2391 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2392 if (!zram->comp_algs[prio]) 2393 continue; 2394 2395 comp = zcomp_create(zram->comp_algs[prio], 2396 &zram->params[prio]); 2397 if (IS_ERR(comp)) { 2398 pr_err("Cannot initialise %s compressing backend\n", 2399 zram->comp_algs[prio]); 2400 err = PTR_ERR(comp); 2401 goto out_free_comps; 2402 } 2403 2404 zram->comps[prio] = comp; 2405 zram->num_active_comps++; 2406 } 2407 zram->disksize = disksize; 2408 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2409 up_write(&zram->init_lock); 2410 2411 return len; 2412 2413 out_free_comps: 2414 zram_destroy_comps(zram); 2415 zram_meta_free(zram, disksize); 2416 out_unlock: 2417 up_write(&zram->init_lock); 2418 return err; 2419 } 2420 2421 static ssize_t reset_store(struct device *dev, 2422 struct device_attribute *attr, const char *buf, size_t len) 2423 { 2424 int ret; 2425 unsigned short do_reset; 2426 struct zram *zram; 2427 struct gendisk *disk; 2428 2429 ret = kstrtou16(buf, 10, &do_reset); 2430 if (ret) 2431 return ret; 2432 2433 if (!do_reset) 2434 return -EINVAL; 2435 2436 zram = dev_to_zram(dev); 2437 disk = zram->disk; 2438 2439 mutex_lock(&disk->open_mutex); 2440 /* Do not reset an active device or claimed device */ 2441 if (disk_openers(disk) || zram->claim) { 2442 mutex_unlock(&disk->open_mutex); 2443 return -EBUSY; 2444 } 2445 2446 /* From now on, anyone can't open /dev/zram[0-9] */ 2447 zram->claim = true; 2448 mutex_unlock(&disk->open_mutex); 2449 2450 /* Make sure all the pending I/O are finished */ 2451 sync_blockdev(disk->part0); 2452 zram_reset_device(zram); 2453 2454 mutex_lock(&disk->open_mutex); 2455 zram->claim = false; 2456 mutex_unlock(&disk->open_mutex); 2457 2458 return len; 2459 } 2460 2461 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2462 { 2463 struct zram *zram = disk->private_data; 2464 2465 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2466 2467 /* zram was claimed to reset so open request fails */ 2468 if (zram->claim) 2469 return -EBUSY; 2470 return 0; 2471 } 2472 2473 static const struct block_device_operations zram_devops = { 2474 .open = zram_open, 2475 .submit_bio = zram_submit_bio, 2476 .swap_slot_free_notify = zram_slot_free_notify, 2477 .owner = THIS_MODULE 2478 }; 2479 2480 static DEVICE_ATTR_WO(compact); 2481 static DEVICE_ATTR_RW(disksize); 2482 static DEVICE_ATTR_RO(initstate); 2483 static DEVICE_ATTR_WO(reset); 2484 static DEVICE_ATTR_WO(mem_limit); 2485 static DEVICE_ATTR_WO(mem_used_max); 2486 static DEVICE_ATTR_WO(idle); 2487 static DEVICE_ATTR_RW(comp_algorithm); 2488 #ifdef CONFIG_ZRAM_WRITEBACK 2489 static DEVICE_ATTR_RW(backing_dev); 2490 static DEVICE_ATTR_WO(writeback); 2491 static DEVICE_ATTR_RW(writeback_limit); 2492 static DEVICE_ATTR_RW(writeback_limit_enable); 2493 #endif 2494 #ifdef CONFIG_ZRAM_MULTI_COMP 2495 static DEVICE_ATTR_RW(recomp_algorithm); 2496 static DEVICE_ATTR_WO(recompress); 2497 #endif 2498 static DEVICE_ATTR_WO(algorithm_params); 2499 2500 static struct attribute *zram_disk_attrs[] = { 2501 &dev_attr_disksize.attr, 2502 &dev_attr_initstate.attr, 2503 &dev_attr_reset.attr, 2504 &dev_attr_compact.attr, 2505 &dev_attr_mem_limit.attr, 2506 &dev_attr_mem_used_max.attr, 2507 &dev_attr_idle.attr, 2508 &dev_attr_comp_algorithm.attr, 2509 #ifdef CONFIG_ZRAM_WRITEBACK 2510 &dev_attr_backing_dev.attr, 2511 &dev_attr_writeback.attr, 2512 &dev_attr_writeback_limit.attr, 2513 &dev_attr_writeback_limit_enable.attr, 2514 #endif 2515 &dev_attr_io_stat.attr, 2516 &dev_attr_mm_stat.attr, 2517 #ifdef CONFIG_ZRAM_WRITEBACK 2518 &dev_attr_bd_stat.attr, 2519 #endif 2520 &dev_attr_debug_stat.attr, 2521 #ifdef CONFIG_ZRAM_MULTI_COMP 2522 &dev_attr_recomp_algorithm.attr, 2523 &dev_attr_recompress.attr, 2524 #endif 2525 &dev_attr_algorithm_params.attr, 2526 NULL, 2527 }; 2528 2529 ATTRIBUTE_GROUPS(zram_disk); 2530 2531 /* 2532 * Allocate and initialize new zram device. the function returns 2533 * '>= 0' device_id upon success, and negative value otherwise. 2534 */ 2535 static int zram_add(void) 2536 { 2537 struct queue_limits lim = { 2538 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 2539 /* 2540 * To ensure that we always get PAGE_SIZE aligned and 2541 * n*PAGE_SIZED sized I/O requests. 2542 */ 2543 .physical_block_size = PAGE_SIZE, 2544 .io_min = PAGE_SIZE, 2545 .io_opt = PAGE_SIZE, 2546 .max_hw_discard_sectors = UINT_MAX, 2547 /* 2548 * zram_bio_discard() will clear all logical blocks if logical 2549 * block size is identical with physical block size(PAGE_SIZE). 2550 * But if it is different, we will skip discarding some parts of 2551 * logical blocks in the part of the request range which isn't 2552 * aligned to physical block size. So we can't ensure that all 2553 * discarded logical blocks are zeroed. 2554 */ 2555 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 2556 .max_write_zeroes_sectors = UINT_MAX, 2557 #endif 2558 .features = BLK_FEAT_STABLE_WRITES | 2559 BLK_FEAT_SYNCHRONOUS, 2560 }; 2561 struct zram *zram; 2562 int ret, device_id; 2563 2564 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 2565 if (!zram) 2566 return -ENOMEM; 2567 2568 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 2569 if (ret < 0) 2570 goto out_free_dev; 2571 device_id = ret; 2572 2573 init_rwsem(&zram->init_lock); 2574 #ifdef CONFIG_ZRAM_WRITEBACK 2575 spin_lock_init(&zram->wb_limit_lock); 2576 #endif 2577 2578 /* gendisk structure */ 2579 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 2580 if (IS_ERR(zram->disk)) { 2581 pr_err("Error allocating disk structure for device %d\n", 2582 device_id); 2583 ret = PTR_ERR(zram->disk); 2584 goto out_free_idr; 2585 } 2586 2587 zram->disk->major = zram_major; 2588 zram->disk->first_minor = device_id; 2589 zram->disk->minors = 1; 2590 zram->disk->flags |= GENHD_FL_NO_PART; 2591 zram->disk->fops = &zram_devops; 2592 zram->disk->private_data = zram; 2593 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 2594 atomic_set(&zram->pp_in_progress, 0); 2595 zram_comp_params_reset(zram); 2596 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2597 2598 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 2599 set_capacity(zram->disk, 0); 2600 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 2601 if (ret) 2602 goto out_cleanup_disk; 2603 2604 zram_debugfs_register(zram); 2605 pr_info("Added device: %s\n", zram->disk->disk_name); 2606 return device_id; 2607 2608 out_cleanup_disk: 2609 put_disk(zram->disk); 2610 out_free_idr: 2611 idr_remove(&zram_index_idr, device_id); 2612 out_free_dev: 2613 kfree(zram); 2614 return ret; 2615 } 2616 2617 static int zram_remove(struct zram *zram) 2618 { 2619 bool claimed; 2620 2621 mutex_lock(&zram->disk->open_mutex); 2622 if (disk_openers(zram->disk)) { 2623 mutex_unlock(&zram->disk->open_mutex); 2624 return -EBUSY; 2625 } 2626 2627 claimed = zram->claim; 2628 if (!claimed) 2629 zram->claim = true; 2630 mutex_unlock(&zram->disk->open_mutex); 2631 2632 zram_debugfs_unregister(zram); 2633 2634 if (claimed) { 2635 /* 2636 * If we were claimed by reset_store(), del_gendisk() will 2637 * wait until reset_store() is done, so nothing need to do. 2638 */ 2639 ; 2640 } else { 2641 /* Make sure all the pending I/O are finished */ 2642 sync_blockdev(zram->disk->part0); 2643 zram_reset_device(zram); 2644 } 2645 2646 pr_info("Removed device: %s\n", zram->disk->disk_name); 2647 2648 del_gendisk(zram->disk); 2649 2650 /* del_gendisk drains pending reset_store */ 2651 WARN_ON_ONCE(claimed && zram->claim); 2652 2653 /* 2654 * disksize_store() may be called in between zram_reset_device() 2655 * and del_gendisk(), so run the last reset to avoid leaking 2656 * anything allocated with disksize_store() 2657 */ 2658 zram_reset_device(zram); 2659 2660 put_disk(zram->disk); 2661 kfree(zram); 2662 return 0; 2663 } 2664 2665 /* zram-control sysfs attributes */ 2666 2667 /* 2668 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2669 * sense that reading from this file does alter the state of your system -- it 2670 * creates a new un-initialized zram device and returns back this device's 2671 * device_id (or an error code if it fails to create a new device). 2672 */ 2673 static ssize_t hot_add_show(const struct class *class, 2674 const struct class_attribute *attr, 2675 char *buf) 2676 { 2677 int ret; 2678 2679 mutex_lock(&zram_index_mutex); 2680 ret = zram_add(); 2681 mutex_unlock(&zram_index_mutex); 2682 2683 if (ret < 0) 2684 return ret; 2685 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2686 } 2687 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 2688 static struct class_attribute class_attr_hot_add = 2689 __ATTR(hot_add, 0400, hot_add_show, NULL); 2690 2691 static ssize_t hot_remove_store(const struct class *class, 2692 const struct class_attribute *attr, 2693 const char *buf, 2694 size_t count) 2695 { 2696 struct zram *zram; 2697 int ret, dev_id; 2698 2699 /* dev_id is gendisk->first_minor, which is `int' */ 2700 ret = kstrtoint(buf, 10, &dev_id); 2701 if (ret) 2702 return ret; 2703 if (dev_id < 0) 2704 return -EINVAL; 2705 2706 mutex_lock(&zram_index_mutex); 2707 2708 zram = idr_find(&zram_index_idr, dev_id); 2709 if (zram) { 2710 ret = zram_remove(zram); 2711 if (!ret) 2712 idr_remove(&zram_index_idr, dev_id); 2713 } else { 2714 ret = -ENODEV; 2715 } 2716 2717 mutex_unlock(&zram_index_mutex); 2718 return ret ? ret : count; 2719 } 2720 static CLASS_ATTR_WO(hot_remove); 2721 2722 static struct attribute *zram_control_class_attrs[] = { 2723 &class_attr_hot_add.attr, 2724 &class_attr_hot_remove.attr, 2725 NULL, 2726 }; 2727 ATTRIBUTE_GROUPS(zram_control_class); 2728 2729 static struct class zram_control_class = { 2730 .name = "zram-control", 2731 .class_groups = zram_control_class_groups, 2732 }; 2733 2734 static int zram_remove_cb(int id, void *ptr, void *data) 2735 { 2736 WARN_ON_ONCE(zram_remove(ptr)); 2737 return 0; 2738 } 2739 2740 static void destroy_devices(void) 2741 { 2742 class_unregister(&zram_control_class); 2743 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2744 zram_debugfs_destroy(); 2745 idr_destroy(&zram_index_idr); 2746 unregister_blkdev(zram_major, "zram"); 2747 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2748 } 2749 2750 static int __init zram_init(void) 2751 { 2752 struct zram_table_entry zram_te; 2753 int ret; 2754 2755 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); 2756 2757 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2758 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2759 if (ret < 0) 2760 return ret; 2761 2762 ret = class_register(&zram_control_class); 2763 if (ret) { 2764 pr_err("Unable to register zram-control class\n"); 2765 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2766 return ret; 2767 } 2768 2769 zram_debugfs_create(); 2770 zram_major = register_blkdev(0, "zram"); 2771 if (zram_major <= 0) { 2772 pr_err("Unable to get major number\n"); 2773 class_unregister(&zram_control_class); 2774 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2775 return -EBUSY; 2776 } 2777 2778 while (num_devices != 0) { 2779 mutex_lock(&zram_index_mutex); 2780 ret = zram_add(); 2781 mutex_unlock(&zram_index_mutex); 2782 if (ret < 0) 2783 goto out_error; 2784 num_devices--; 2785 } 2786 2787 return 0; 2788 2789 out_error: 2790 destroy_devices(); 2791 return ret; 2792 } 2793 2794 static void __exit zram_exit(void) 2795 { 2796 destroy_devices(); 2797 } 2798 2799 module_init(zram_init); 2800 module_exit(zram_exit); 2801 2802 module_param(num_devices, uint, 0); 2803 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2804 2805 MODULE_LICENSE("Dual BSD/GPL"); 2806 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2807 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2808