1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 #include <linux/kernel_read_file.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 #define ZRAM_MAX_ALGO_NAME_SZ 128 48 49 /* Module params (documentation at end) */ 50 static unsigned int num_devices = 1; 51 /* 52 * Pages that compress to sizes equals or greater than this are stored 53 * uncompressed in memory. 54 */ 55 static size_t huge_class_size; 56 57 static const struct block_device_operations zram_devops; 58 59 static void zram_free_page(struct zram *zram, size_t index); 60 static int zram_read_from_zspool(struct zram *zram, struct page *page, 61 u32 index); 62 63 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 64 65 static void zram_slot_lock_init(struct zram *zram, u32 index) 66 { 67 static struct lock_class_key __key; 68 69 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 70 &__key, 0); 71 } 72 73 /* 74 * entry locking rules: 75 * 76 * 1) Lock is exclusive 77 * 78 * 2) lock() function can sleep waiting for the lock 79 * 80 * 3) Lock owner can sleep 81 * 82 * 4) Use TRY lock variant when in atomic context 83 * - must check return value and handle locking failers 84 */ 85 static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) 86 { 87 unsigned long *lock = &zram->table[index].flags; 88 89 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 90 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 91 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 92 return true; 93 } 94 95 return false; 96 } 97 98 static void zram_slot_lock(struct zram *zram, u32 index) 99 { 100 unsigned long *lock = &zram->table[index].flags; 101 102 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 103 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 104 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 105 } 106 107 static void zram_slot_unlock(struct zram *zram, u32 index) 108 { 109 unsigned long *lock = &zram->table[index].flags; 110 111 mutex_release(slot_dep_map(zram, index), _RET_IP_); 112 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 113 } 114 115 static inline bool init_done(struct zram *zram) 116 { 117 return zram->disksize; 118 } 119 120 static inline struct zram *dev_to_zram(struct device *dev) 121 { 122 return (struct zram *)dev_to_disk(dev)->private_data; 123 } 124 125 static unsigned long zram_get_handle(struct zram *zram, u32 index) 126 { 127 return zram->table[index].handle; 128 } 129 130 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 131 { 132 zram->table[index].handle = handle; 133 } 134 135 static bool zram_test_flag(struct zram *zram, u32 index, 136 enum zram_pageflags flag) 137 { 138 return zram->table[index].flags & BIT(flag); 139 } 140 141 static void zram_set_flag(struct zram *zram, u32 index, 142 enum zram_pageflags flag) 143 { 144 zram->table[index].flags |= BIT(flag); 145 } 146 147 static void zram_clear_flag(struct zram *zram, u32 index, 148 enum zram_pageflags flag) 149 { 150 zram->table[index].flags &= ~BIT(flag); 151 } 152 153 static size_t zram_get_obj_size(struct zram *zram, u32 index) 154 { 155 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 156 } 157 158 static void zram_set_obj_size(struct zram *zram, 159 u32 index, size_t size) 160 { 161 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 162 163 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 164 } 165 166 static inline bool zram_allocated(struct zram *zram, u32 index) 167 { 168 return zram_get_obj_size(zram, index) || 169 zram_test_flag(zram, index, ZRAM_SAME) || 170 zram_test_flag(zram, index, ZRAM_WB); 171 } 172 173 static inline void update_used_max(struct zram *zram, const unsigned long pages) 174 { 175 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 176 177 do { 178 if (cur_max >= pages) 179 return; 180 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 181 &cur_max, pages)); 182 } 183 184 static bool zram_can_store_page(struct zram *zram) 185 { 186 unsigned long alloced_pages; 187 188 alloced_pages = zs_get_total_pages(zram->mem_pool); 189 update_used_max(zram, alloced_pages); 190 191 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 192 } 193 194 #if PAGE_SIZE != 4096 195 static inline bool is_partial_io(struct bio_vec *bvec) 196 { 197 return bvec->bv_len != PAGE_SIZE; 198 } 199 #define ZRAM_PARTIAL_IO 1 200 #else 201 static inline bool is_partial_io(struct bio_vec *bvec) 202 { 203 return false; 204 } 205 #endif 206 207 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) 208 { 209 prio &= ZRAM_COMP_PRIORITY_MASK; 210 /* 211 * Clear previous priority value first, in case if we recompress 212 * further an already recompressed page 213 */ 214 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << 215 ZRAM_COMP_PRIORITY_BIT1); 216 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 217 } 218 219 static inline u32 zram_get_priority(struct zram *zram, u32 index) 220 { 221 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; 222 223 return prio & ZRAM_COMP_PRIORITY_MASK; 224 } 225 226 static void zram_accessed(struct zram *zram, u32 index) 227 { 228 zram_clear_flag(zram, index, ZRAM_IDLE); 229 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 230 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 231 zram->table[index].ac_time = ktime_get_boottime(); 232 #endif 233 } 234 235 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 236 struct zram_pp_slot { 237 unsigned long index; 238 struct list_head entry; 239 }; 240 241 /* 242 * A post-processing bucket is, essentially, a size class, this defines 243 * the range (in bytes) of pp-slots sizes in particular bucket. 244 */ 245 #define PP_BUCKET_SIZE_RANGE 64 246 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 247 248 struct zram_pp_ctl { 249 struct list_head pp_buckets[NUM_PP_BUCKETS]; 250 }; 251 252 static struct zram_pp_ctl *init_pp_ctl(void) 253 { 254 struct zram_pp_ctl *ctl; 255 u32 idx; 256 257 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); 258 if (!ctl) 259 return NULL; 260 261 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 262 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 263 return ctl; 264 } 265 266 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 267 { 268 list_del_init(&pps->entry); 269 270 zram_slot_lock(zram, pps->index); 271 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); 272 zram_slot_unlock(zram, pps->index); 273 274 kfree(pps); 275 } 276 277 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 278 { 279 u32 idx; 280 281 if (!ctl) 282 return; 283 284 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 285 while (!list_empty(&ctl->pp_buckets[idx])) { 286 struct zram_pp_slot *pps; 287 288 pps = list_first_entry(&ctl->pp_buckets[idx], 289 struct zram_pp_slot, 290 entry); 291 release_pp_slot(zram, pps); 292 } 293 } 294 295 kfree(ctl); 296 } 297 298 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 299 u32 index) 300 { 301 struct zram_pp_slot *pps; 302 u32 bid; 303 304 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN); 305 if (!pps) 306 return false; 307 308 INIT_LIST_HEAD(&pps->entry); 309 pps->index = index; 310 311 bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 312 list_add(&pps->entry, &ctl->pp_buckets[bid]); 313 314 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); 315 return true; 316 } 317 318 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 319 { 320 struct zram_pp_slot *pps = NULL; 321 s32 idx = NUM_PP_BUCKETS - 1; 322 323 /* The higher the bucket id the more optimal slot post-processing is */ 324 while (idx >= 0) { 325 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 326 struct zram_pp_slot, 327 entry); 328 if (pps) 329 break; 330 331 idx--; 332 } 333 return pps; 334 } 335 #endif 336 337 static inline void zram_fill_page(void *ptr, unsigned long len, 338 unsigned long value) 339 { 340 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 341 memset_l(ptr, value, len / sizeof(unsigned long)); 342 } 343 344 static bool page_same_filled(void *ptr, unsigned long *element) 345 { 346 unsigned long *page; 347 unsigned long val; 348 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 349 350 page = (unsigned long *)ptr; 351 val = page[0]; 352 353 if (val != page[last_pos]) 354 return false; 355 356 for (pos = 1; pos < last_pos; pos++) { 357 if (val != page[pos]) 358 return false; 359 } 360 361 *element = val; 362 363 return true; 364 } 365 366 static ssize_t initstate_show(struct device *dev, 367 struct device_attribute *attr, char *buf) 368 { 369 u32 val; 370 struct zram *zram = dev_to_zram(dev); 371 372 down_read(&zram->init_lock); 373 val = init_done(zram); 374 up_read(&zram->init_lock); 375 376 return sysfs_emit(buf, "%u\n", val); 377 } 378 379 static ssize_t disksize_show(struct device *dev, 380 struct device_attribute *attr, char *buf) 381 { 382 struct zram *zram = dev_to_zram(dev); 383 384 return sysfs_emit(buf, "%llu\n", zram->disksize); 385 } 386 387 static ssize_t mem_limit_store(struct device *dev, 388 struct device_attribute *attr, const char *buf, size_t len) 389 { 390 u64 limit; 391 char *tmp; 392 struct zram *zram = dev_to_zram(dev); 393 394 limit = memparse(buf, &tmp); 395 if (buf == tmp) /* no chars parsed, invalid input */ 396 return -EINVAL; 397 398 down_write(&zram->init_lock); 399 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 400 up_write(&zram->init_lock); 401 402 return len; 403 } 404 405 static ssize_t mem_used_max_store(struct device *dev, 406 struct device_attribute *attr, const char *buf, size_t len) 407 { 408 int err; 409 unsigned long val; 410 struct zram *zram = dev_to_zram(dev); 411 412 err = kstrtoul(buf, 10, &val); 413 if (err || val != 0) 414 return -EINVAL; 415 416 down_read(&zram->init_lock); 417 if (init_done(zram)) { 418 atomic_long_set(&zram->stats.max_used_pages, 419 zs_get_total_pages(zram->mem_pool)); 420 } 421 up_read(&zram->init_lock); 422 423 return len; 424 } 425 426 /* 427 * Mark all pages which are older than or equal to cutoff as IDLE. 428 * Callers should hold the zram init lock in read mode 429 */ 430 static void mark_idle(struct zram *zram, ktime_t cutoff) 431 { 432 int is_idle = 1; 433 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 434 int index; 435 436 for (index = 0; index < nr_pages; index++) { 437 /* 438 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 439 * post-processing (recompress, writeback) happens to the 440 * ZRAM_SAME slot. 441 * 442 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 443 */ 444 zram_slot_lock(zram, index); 445 if (!zram_allocated(zram, index) || 446 zram_test_flag(zram, index, ZRAM_WB) || 447 zram_test_flag(zram, index, ZRAM_SAME)) { 448 zram_slot_unlock(zram, index); 449 continue; 450 } 451 452 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 453 is_idle = !cutoff || 454 ktime_after(cutoff, zram->table[index].ac_time); 455 #endif 456 if (is_idle) 457 zram_set_flag(zram, index, ZRAM_IDLE); 458 else 459 zram_clear_flag(zram, index, ZRAM_IDLE); 460 zram_slot_unlock(zram, index); 461 } 462 } 463 464 static ssize_t idle_store(struct device *dev, 465 struct device_attribute *attr, const char *buf, size_t len) 466 { 467 struct zram *zram = dev_to_zram(dev); 468 ktime_t cutoff_time = 0; 469 ssize_t rv = -EINVAL; 470 471 if (!sysfs_streq(buf, "all")) { 472 /* 473 * If it did not parse as 'all' try to treat it as an integer 474 * when we have memory tracking enabled. 475 */ 476 u64 age_sec; 477 478 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) 479 cutoff_time = ktime_sub(ktime_get_boottime(), 480 ns_to_ktime(age_sec * NSEC_PER_SEC)); 481 else 482 goto out; 483 } 484 485 down_read(&zram->init_lock); 486 if (!init_done(zram)) 487 goto out_unlock; 488 489 /* 490 * A cutoff_time of 0 marks everything as idle, this is the 491 * "all" behavior. 492 */ 493 mark_idle(zram, cutoff_time); 494 rv = len; 495 496 out_unlock: 497 up_read(&zram->init_lock); 498 out: 499 return rv; 500 } 501 502 #ifdef CONFIG_ZRAM_WRITEBACK 503 #define INVALID_BDEV_BLOCK (~0UL) 504 505 struct zram_wb_ctl { 506 /* idle list is accessed only by the writeback task, no concurency */ 507 struct list_head idle_reqs; 508 /* done list is accessed concurrently, protect by done_lock */ 509 struct list_head done_reqs; 510 wait_queue_head_t done_wait; 511 spinlock_t done_lock; 512 atomic_t num_inflight; 513 }; 514 515 struct zram_wb_req { 516 unsigned long blk_idx; 517 struct page *page; 518 struct zram_pp_slot *pps; 519 struct bio_vec bio_vec; 520 struct bio bio; 521 522 struct list_head entry; 523 }; 524 525 static ssize_t writeback_limit_enable_store(struct device *dev, 526 struct device_attribute *attr, 527 const char *buf, size_t len) 528 { 529 struct zram *zram = dev_to_zram(dev); 530 u64 val; 531 ssize_t ret = -EINVAL; 532 533 if (kstrtoull(buf, 10, &val)) 534 return ret; 535 536 down_write(&zram->init_lock); 537 zram->wb_limit_enable = val; 538 up_write(&zram->init_lock); 539 ret = len; 540 541 return ret; 542 } 543 544 static ssize_t writeback_limit_enable_show(struct device *dev, 545 struct device_attribute *attr, 546 char *buf) 547 { 548 bool val; 549 struct zram *zram = dev_to_zram(dev); 550 551 down_read(&zram->init_lock); 552 val = zram->wb_limit_enable; 553 up_read(&zram->init_lock); 554 555 return sysfs_emit(buf, "%d\n", val); 556 } 557 558 static ssize_t writeback_limit_store(struct device *dev, 559 struct device_attribute *attr, 560 const char *buf, size_t len) 561 { 562 struct zram *zram = dev_to_zram(dev); 563 u64 val; 564 ssize_t ret = -EINVAL; 565 566 if (kstrtoull(buf, 10, &val)) 567 return ret; 568 569 /* 570 * When the page size is greater than 4KB, if bd_wb_limit is set to 571 * a value that is not page - size aligned, it will cause value 572 * wrapping. For example, when the page size is set to 16KB and 573 * bd_wb_limit is set to 3, a single write - back operation will 574 * cause bd_wb_limit to become -1. Even more terrifying is that 575 * bd_wb_limit is an unsigned number. 576 */ 577 val = rounddown(val, PAGE_SIZE / 4096); 578 579 down_write(&zram->init_lock); 580 zram->bd_wb_limit = val; 581 up_write(&zram->init_lock); 582 ret = len; 583 584 return ret; 585 } 586 587 static ssize_t writeback_limit_show(struct device *dev, 588 struct device_attribute *attr, char *buf) 589 { 590 u64 val; 591 struct zram *zram = dev_to_zram(dev); 592 593 down_read(&zram->init_lock); 594 val = zram->bd_wb_limit; 595 up_read(&zram->init_lock); 596 597 return sysfs_emit(buf, "%llu\n", val); 598 } 599 600 static ssize_t writeback_batch_size_store(struct device *dev, 601 struct device_attribute *attr, 602 const char *buf, size_t len) 603 { 604 struct zram *zram = dev_to_zram(dev); 605 u32 val; 606 607 if (kstrtouint(buf, 10, &val)) 608 return -EINVAL; 609 610 if (!val) 611 return -EINVAL; 612 613 down_write(&zram->init_lock); 614 zram->wb_batch_size = val; 615 up_write(&zram->init_lock); 616 617 return len; 618 } 619 620 static ssize_t writeback_batch_size_show(struct device *dev, 621 struct device_attribute *attr, 622 char *buf) 623 { 624 u32 val; 625 struct zram *zram = dev_to_zram(dev); 626 627 down_read(&zram->init_lock); 628 val = zram->wb_batch_size; 629 up_read(&zram->init_lock); 630 631 return sysfs_emit(buf, "%u\n", val); 632 } 633 634 static void reset_bdev(struct zram *zram) 635 { 636 if (!zram->backing_dev) 637 return; 638 639 /* hope filp_close flush all of IO */ 640 filp_close(zram->backing_dev, NULL); 641 zram->backing_dev = NULL; 642 zram->bdev = NULL; 643 zram->disk->fops = &zram_devops; 644 kvfree(zram->bitmap); 645 zram->bitmap = NULL; 646 } 647 648 static ssize_t backing_dev_show(struct device *dev, 649 struct device_attribute *attr, char *buf) 650 { 651 struct file *file; 652 struct zram *zram = dev_to_zram(dev); 653 char *p; 654 ssize_t ret; 655 656 down_read(&zram->init_lock); 657 file = zram->backing_dev; 658 if (!file) { 659 memcpy(buf, "none\n", 5); 660 up_read(&zram->init_lock); 661 return 5; 662 } 663 664 p = file_path(file, buf, PAGE_SIZE - 1); 665 if (IS_ERR(p)) { 666 ret = PTR_ERR(p); 667 goto out; 668 } 669 670 ret = strlen(p); 671 memmove(buf, p, ret); 672 buf[ret++] = '\n'; 673 out: 674 up_read(&zram->init_lock); 675 return ret; 676 } 677 678 static ssize_t backing_dev_store(struct device *dev, 679 struct device_attribute *attr, const char *buf, size_t len) 680 { 681 char *file_name; 682 size_t sz; 683 struct file *backing_dev = NULL; 684 struct inode *inode; 685 unsigned int bitmap_sz; 686 unsigned long nr_pages, *bitmap = NULL; 687 int err; 688 struct zram *zram = dev_to_zram(dev); 689 690 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 691 if (!file_name) 692 return -ENOMEM; 693 694 down_write(&zram->init_lock); 695 if (init_done(zram)) { 696 pr_info("Can't setup backing device for initialized device\n"); 697 err = -EBUSY; 698 goto out; 699 } 700 701 strscpy(file_name, buf, PATH_MAX); 702 /* ignore trailing newline */ 703 sz = strlen(file_name); 704 if (sz > 0 && file_name[sz - 1] == '\n') 705 file_name[sz - 1] = 0x00; 706 707 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 708 if (IS_ERR(backing_dev)) { 709 err = PTR_ERR(backing_dev); 710 backing_dev = NULL; 711 goto out; 712 } 713 714 inode = backing_dev->f_mapping->host; 715 716 /* Support only block device in this moment */ 717 if (!S_ISBLK(inode->i_mode)) { 718 err = -ENOTBLK; 719 goto out; 720 } 721 722 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 723 /* Refuse to use zero sized device (also prevents self reference) */ 724 if (!nr_pages) { 725 err = -EINVAL; 726 goto out; 727 } 728 729 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 730 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 731 if (!bitmap) { 732 err = -ENOMEM; 733 goto out; 734 } 735 736 reset_bdev(zram); 737 738 zram->bdev = I_BDEV(inode); 739 zram->backing_dev = backing_dev; 740 zram->bitmap = bitmap; 741 zram->nr_pages = nr_pages; 742 up_write(&zram->init_lock); 743 744 pr_info("setup backing device %s\n", file_name); 745 kfree(file_name); 746 747 return len; 748 out: 749 kvfree(bitmap); 750 751 if (backing_dev) 752 filp_close(backing_dev, NULL); 753 754 up_write(&zram->init_lock); 755 756 kfree(file_name); 757 758 return err; 759 } 760 761 static unsigned long zram_reserve_bdev_block(struct zram *zram) 762 { 763 unsigned long blk_idx; 764 765 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 766 if (blk_idx == zram->nr_pages) 767 return INVALID_BDEV_BLOCK; 768 769 set_bit(blk_idx, zram->bitmap); 770 atomic64_inc(&zram->stats.bd_count); 771 return blk_idx; 772 } 773 774 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 775 { 776 int was_set; 777 778 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 779 WARN_ON_ONCE(!was_set); 780 atomic64_dec(&zram->stats.bd_count); 781 } 782 783 static void read_from_bdev_async(struct zram *zram, struct page *page, 784 unsigned long entry, struct bio *parent) 785 { 786 struct bio *bio; 787 788 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 789 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 790 __bio_add_page(bio, page, PAGE_SIZE, 0); 791 bio_chain(bio, parent); 792 submit_bio(bio); 793 } 794 795 static void release_wb_req(struct zram_wb_req *req) 796 { 797 __free_page(req->page); 798 kfree(req); 799 } 800 801 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 802 { 803 if (!wb_ctl) 804 return; 805 806 /* We should never have inflight requests at this point */ 807 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 808 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 809 810 while (!list_empty(&wb_ctl->idle_reqs)) { 811 struct zram_wb_req *req; 812 813 req = list_first_entry(&wb_ctl->idle_reqs, 814 struct zram_wb_req, entry); 815 list_del(&req->entry); 816 release_wb_req(req); 817 } 818 819 kfree(wb_ctl); 820 } 821 822 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 823 { 824 struct zram_wb_ctl *wb_ctl; 825 int i; 826 827 wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL); 828 if (!wb_ctl) 829 return NULL; 830 831 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 832 INIT_LIST_HEAD(&wb_ctl->done_reqs); 833 atomic_set(&wb_ctl->num_inflight, 0); 834 init_waitqueue_head(&wb_ctl->done_wait); 835 spin_lock_init(&wb_ctl->done_lock); 836 837 for (i = 0; i < zram->wb_batch_size; i++) { 838 struct zram_wb_req *req; 839 840 /* 841 * This is fatal condition only if we couldn't allocate 842 * any requests at all. Otherwise we just work with the 843 * requests that we have successfully allocated, so that 844 * writeback can still proceed, even if there is only one 845 * request on the idle list. 846 */ 847 req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN); 848 if (!req) 849 break; 850 851 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 852 if (!req->page) { 853 kfree(req); 854 break; 855 } 856 857 list_add(&req->entry, &wb_ctl->idle_reqs); 858 } 859 860 /* We couldn't allocate any requests, so writeabck is not possible */ 861 if (list_empty(&wb_ctl->idle_reqs)) 862 goto release_wb_ctl; 863 864 return wb_ctl; 865 866 release_wb_ctl: 867 release_wb_ctl(wb_ctl); 868 return NULL; 869 } 870 871 static void zram_account_writeback_rollback(struct zram *zram) 872 { 873 lockdep_assert_held_read(&zram->init_lock); 874 875 if (zram->wb_limit_enable) 876 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 877 } 878 879 static void zram_account_writeback_submit(struct zram *zram) 880 { 881 lockdep_assert_held_read(&zram->init_lock); 882 883 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 884 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 885 } 886 887 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 888 { 889 u32 index = req->pps->index; 890 int err; 891 892 err = blk_status_to_errno(req->bio.bi_status); 893 if (err) { 894 /* 895 * Failed wb requests should not be accounted in wb_limit 896 * (if enabled). 897 */ 898 zram_account_writeback_rollback(zram); 899 zram_release_bdev_block(zram, req->blk_idx); 900 return err; 901 } 902 903 atomic64_inc(&zram->stats.bd_writes); 904 zram_slot_lock(zram, index); 905 /* 906 * We release slot lock during writeback so slot can change under us: 907 * slot_free() or slot_free() and zram_write_page(). In both cases 908 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 909 * set ZRAM_PP_SLOT on such slots until current post-processing 910 * finishes. 911 */ 912 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) { 913 zram_release_bdev_block(zram, req->blk_idx); 914 goto out; 915 } 916 917 zram_free_page(zram, index); 918 zram_set_flag(zram, index, ZRAM_WB); 919 zram_set_handle(zram, index, req->blk_idx); 920 atomic64_inc(&zram->stats.pages_stored); 921 922 out: 923 zram_slot_unlock(zram, index); 924 return 0; 925 } 926 927 static void zram_writeback_endio(struct bio *bio) 928 { 929 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 930 struct zram_wb_ctl *wb_ctl = bio->bi_private; 931 unsigned long flags; 932 933 spin_lock_irqsave(&wb_ctl->done_lock, flags); 934 list_add(&req->entry, &wb_ctl->done_reqs); 935 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 936 937 wake_up(&wb_ctl->done_wait); 938 } 939 940 static void zram_submit_wb_request(struct zram *zram, 941 struct zram_wb_ctl *wb_ctl, 942 struct zram_wb_req *req) 943 { 944 /* 945 * wb_limit (if enabled) should be adjusted before submission, 946 * so that we don't over-submit. 947 */ 948 zram_account_writeback_submit(zram); 949 atomic_inc(&wb_ctl->num_inflight); 950 req->bio.bi_private = wb_ctl; 951 submit_bio(&req->bio); 952 } 953 954 static int zram_complete_done_reqs(struct zram *zram, 955 struct zram_wb_ctl *wb_ctl) 956 { 957 struct zram_wb_req *req; 958 unsigned long flags; 959 int ret = 0, err; 960 961 while (atomic_read(&wb_ctl->num_inflight) > 0) { 962 spin_lock_irqsave(&wb_ctl->done_lock, flags); 963 req = list_first_entry_or_null(&wb_ctl->done_reqs, 964 struct zram_wb_req, entry); 965 if (req) 966 list_del(&req->entry); 967 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 968 969 /* ->num_inflight > 0 doesn't mean we have done requests */ 970 if (!req) 971 break; 972 973 err = zram_writeback_complete(zram, req); 974 if (err) 975 ret = err; 976 977 atomic_dec(&wb_ctl->num_inflight); 978 release_pp_slot(zram, req->pps); 979 req->pps = NULL; 980 981 list_add(&req->entry, &wb_ctl->idle_reqs); 982 } 983 984 return ret; 985 } 986 987 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 988 { 989 struct zram_wb_req *req; 990 991 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 992 struct zram_wb_req, entry); 993 if (req) 994 list_del(&req->entry); 995 return req; 996 } 997 998 static int zram_writeback_slots(struct zram *zram, 999 struct zram_pp_ctl *ctl, 1000 struct zram_wb_ctl *wb_ctl) 1001 { 1002 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1003 struct zram_wb_req *req = NULL; 1004 struct zram_pp_slot *pps; 1005 int ret = 0, err = 0; 1006 u32 index = 0; 1007 1008 while ((pps = select_pp_slot(ctl))) { 1009 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1010 ret = -EIO; 1011 break; 1012 } 1013 1014 while (!req) { 1015 req = zram_select_idle_req(wb_ctl); 1016 if (req) 1017 break; 1018 1019 wait_event(wb_ctl->done_wait, 1020 !list_empty(&wb_ctl->done_reqs)); 1021 1022 err = zram_complete_done_reqs(zram, wb_ctl); 1023 /* 1024 * BIO errors are not fatal, we continue and simply 1025 * attempt to writeback the remaining objects (pages). 1026 * At the same time we need to signal user-space that 1027 * some writes (at least one, but also could be all of 1028 * them) were not successful and we do so by returning 1029 * the most recent BIO error. 1030 */ 1031 if (err) 1032 ret = err; 1033 } 1034 1035 if (blk_idx == INVALID_BDEV_BLOCK) { 1036 blk_idx = zram_reserve_bdev_block(zram); 1037 if (blk_idx == INVALID_BDEV_BLOCK) { 1038 ret = -ENOSPC; 1039 break; 1040 } 1041 } 1042 1043 index = pps->index; 1044 zram_slot_lock(zram, index); 1045 /* 1046 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1047 * slots can change in the meantime. If slots are accessed or 1048 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1049 * post-process them. 1050 */ 1051 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 1052 goto next; 1053 if (zram_read_from_zspool(zram, req->page, index)) 1054 goto next; 1055 zram_slot_unlock(zram, index); 1056 1057 /* 1058 * From now on pp-slot is owned by the req, remove it from 1059 * its pp bucket. 1060 */ 1061 list_del_init(&pps->entry); 1062 1063 req->blk_idx = blk_idx; 1064 req->pps = pps; 1065 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1066 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1067 req->bio.bi_end_io = zram_writeback_endio; 1068 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1069 1070 zram_submit_wb_request(zram, wb_ctl, req); 1071 blk_idx = INVALID_BDEV_BLOCK; 1072 req = NULL; 1073 cond_resched(); 1074 continue; 1075 1076 next: 1077 zram_slot_unlock(zram, index); 1078 release_pp_slot(zram, pps); 1079 } 1080 1081 /* 1082 * Selected idle req, but never submitted it due to some error or 1083 * wb limit. 1084 */ 1085 if (req) 1086 release_wb_req(req); 1087 1088 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1089 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1090 err = zram_complete_done_reqs(zram, wb_ctl); 1091 if (err) 1092 ret = err; 1093 } 1094 1095 return ret; 1096 } 1097 1098 #define PAGE_WRITEBACK 0 1099 #define HUGE_WRITEBACK (1 << 0) 1100 #define IDLE_WRITEBACK (1 << 1) 1101 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1102 1103 static int parse_page_index(char *val, unsigned long nr_pages, 1104 unsigned long *lo, unsigned long *hi) 1105 { 1106 int ret; 1107 1108 ret = kstrtoul(val, 10, lo); 1109 if (ret) 1110 return ret; 1111 if (*lo >= nr_pages) 1112 return -ERANGE; 1113 *hi = *lo + 1; 1114 return 0; 1115 } 1116 1117 static int parse_page_indexes(char *val, unsigned long nr_pages, 1118 unsigned long *lo, unsigned long *hi) 1119 { 1120 char *delim; 1121 int ret; 1122 1123 delim = strchr(val, '-'); 1124 if (!delim) 1125 return -EINVAL; 1126 1127 *delim = 0x00; 1128 ret = kstrtoul(val, 10, lo); 1129 if (ret) 1130 return ret; 1131 if (*lo >= nr_pages) 1132 return -ERANGE; 1133 1134 ret = kstrtoul(delim + 1, 10, hi); 1135 if (ret) 1136 return ret; 1137 if (*hi >= nr_pages || *lo > *hi) 1138 return -ERANGE; 1139 *hi += 1; 1140 return 0; 1141 } 1142 1143 static int parse_mode(char *val, u32 *mode) 1144 { 1145 *mode = 0; 1146 1147 if (!strcmp(val, "idle")) 1148 *mode = IDLE_WRITEBACK; 1149 if (!strcmp(val, "huge")) 1150 *mode = HUGE_WRITEBACK; 1151 if (!strcmp(val, "huge_idle")) 1152 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1153 if (!strcmp(val, "incompressible")) 1154 *mode = INCOMPRESSIBLE_WRITEBACK; 1155 1156 if (*mode == 0) 1157 return -EINVAL; 1158 return 0; 1159 } 1160 1161 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 1162 unsigned long lo, unsigned long hi, 1163 struct zram_pp_ctl *ctl) 1164 { 1165 u32 index = lo; 1166 1167 while (index < hi) { 1168 bool ok = true; 1169 1170 zram_slot_lock(zram, index); 1171 if (!zram_allocated(zram, index)) 1172 goto next; 1173 1174 if (zram_test_flag(zram, index, ZRAM_WB) || 1175 zram_test_flag(zram, index, ZRAM_SAME)) 1176 goto next; 1177 1178 if (mode & IDLE_WRITEBACK && 1179 !zram_test_flag(zram, index, ZRAM_IDLE)) 1180 goto next; 1181 if (mode & HUGE_WRITEBACK && 1182 !zram_test_flag(zram, index, ZRAM_HUGE)) 1183 goto next; 1184 if (mode & INCOMPRESSIBLE_WRITEBACK && 1185 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1186 goto next; 1187 1188 ok = place_pp_slot(zram, ctl, index); 1189 next: 1190 zram_slot_unlock(zram, index); 1191 if (!ok) 1192 break; 1193 index++; 1194 } 1195 1196 return 0; 1197 } 1198 1199 static ssize_t writeback_store(struct device *dev, 1200 struct device_attribute *attr, 1201 const char *buf, size_t len) 1202 { 1203 struct zram *zram = dev_to_zram(dev); 1204 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1205 unsigned long lo = 0, hi = nr_pages; 1206 struct zram_pp_ctl *pp_ctl = NULL; 1207 struct zram_wb_ctl *wb_ctl = NULL; 1208 char *args, *param, *val; 1209 ssize_t ret = len; 1210 int err, mode = 0; 1211 1212 down_read(&zram->init_lock); 1213 if (!init_done(zram)) { 1214 up_read(&zram->init_lock); 1215 return -EINVAL; 1216 } 1217 1218 /* Do not permit concurrent post-processing actions. */ 1219 if (atomic_xchg(&zram->pp_in_progress, 1)) { 1220 up_read(&zram->init_lock); 1221 return -EAGAIN; 1222 } 1223 1224 if (!zram->backing_dev) { 1225 ret = -ENODEV; 1226 goto release_init_lock; 1227 } 1228 1229 pp_ctl = init_pp_ctl(); 1230 if (!pp_ctl) { 1231 ret = -ENOMEM; 1232 goto release_init_lock; 1233 } 1234 1235 wb_ctl = init_wb_ctl(zram); 1236 if (!wb_ctl) { 1237 ret = -ENOMEM; 1238 goto release_init_lock; 1239 } 1240 1241 args = skip_spaces(buf); 1242 while (*args) { 1243 args = next_arg(args, ¶m, &val); 1244 1245 /* 1246 * Workaround to support the old writeback interface. 1247 * 1248 * The old writeback interface has a minor inconsistency and 1249 * requires key=value only for page_index parameter, while the 1250 * writeback mode is a valueless parameter. 1251 * 1252 * This is not the case anymore and now all parameters are 1253 * required to have values, however, we need to support the 1254 * legacy writeback interface format so we check if we can 1255 * recognize a valueless parameter as the (legacy) writeback 1256 * mode. 1257 */ 1258 if (!val || !*val) { 1259 err = parse_mode(param, &mode); 1260 if (err) { 1261 ret = err; 1262 goto release_init_lock; 1263 } 1264 1265 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1266 break; 1267 } 1268 1269 if (!strcmp(param, "type")) { 1270 err = parse_mode(val, &mode); 1271 if (err) { 1272 ret = err; 1273 goto release_init_lock; 1274 } 1275 1276 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1277 break; 1278 } 1279 1280 if (!strcmp(param, "page_index")) { 1281 err = parse_page_index(val, nr_pages, &lo, &hi); 1282 if (err) { 1283 ret = err; 1284 goto release_init_lock; 1285 } 1286 1287 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1288 continue; 1289 } 1290 1291 if (!strcmp(param, "page_indexes")) { 1292 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1293 if (err) { 1294 ret = err; 1295 goto release_init_lock; 1296 } 1297 1298 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1299 continue; 1300 } 1301 } 1302 1303 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1304 if (err) 1305 ret = err; 1306 1307 release_init_lock: 1308 release_pp_ctl(zram, pp_ctl); 1309 release_wb_ctl(wb_ctl); 1310 atomic_set(&zram->pp_in_progress, 0); 1311 up_read(&zram->init_lock); 1312 1313 return ret; 1314 } 1315 1316 struct zram_work { 1317 struct work_struct work; 1318 struct zram *zram; 1319 unsigned long entry; 1320 struct page *page; 1321 int error; 1322 }; 1323 1324 static void zram_sync_read(struct work_struct *work) 1325 { 1326 struct zram_work *zw = container_of(work, struct zram_work, work); 1327 struct bio_vec bv; 1328 struct bio bio; 1329 1330 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 1331 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 1332 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 1333 zw->error = submit_bio_wait(&bio); 1334 } 1335 1336 /* 1337 * Block layer want one ->submit_bio to be active at a time, so if we use 1338 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1339 * use a worker thread context. 1340 */ 1341 static int read_from_bdev_sync(struct zram *zram, struct page *page, 1342 unsigned long entry) 1343 { 1344 struct zram_work work; 1345 1346 work.page = page; 1347 work.zram = zram; 1348 work.entry = entry; 1349 1350 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 1351 queue_work(system_dfl_wq, &work.work); 1352 flush_work(&work.work); 1353 destroy_work_on_stack(&work.work); 1354 1355 return work.error; 1356 } 1357 1358 static int read_from_bdev(struct zram *zram, struct page *page, 1359 unsigned long entry, struct bio *parent) 1360 { 1361 atomic64_inc(&zram->stats.bd_reads); 1362 if (!parent) { 1363 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1364 return -EIO; 1365 return read_from_bdev_sync(zram, page, entry); 1366 } 1367 read_from_bdev_async(zram, page, entry, parent); 1368 return 0; 1369 } 1370 #else 1371 static inline void reset_bdev(struct zram *zram) {}; 1372 static int read_from_bdev(struct zram *zram, struct page *page, 1373 unsigned long entry, struct bio *parent) 1374 { 1375 return -EIO; 1376 } 1377 1378 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1379 { 1380 } 1381 #endif 1382 1383 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1384 1385 static struct dentry *zram_debugfs_root; 1386 1387 static void zram_debugfs_create(void) 1388 { 1389 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1390 } 1391 1392 static void zram_debugfs_destroy(void) 1393 { 1394 debugfs_remove_recursive(zram_debugfs_root); 1395 } 1396 1397 static ssize_t read_block_state(struct file *file, char __user *buf, 1398 size_t count, loff_t *ppos) 1399 { 1400 char *kbuf; 1401 ssize_t index, written = 0; 1402 struct zram *zram = file->private_data; 1403 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1404 struct timespec64 ts; 1405 1406 kbuf = kvmalloc(count, GFP_KERNEL); 1407 if (!kbuf) 1408 return -ENOMEM; 1409 1410 down_read(&zram->init_lock); 1411 if (!init_done(zram)) { 1412 up_read(&zram->init_lock); 1413 kvfree(kbuf); 1414 return -EINVAL; 1415 } 1416 1417 for (index = *ppos; index < nr_pages; index++) { 1418 int copied; 1419 1420 zram_slot_lock(zram, index); 1421 if (!zram_allocated(zram, index)) 1422 goto next; 1423 1424 ts = ktime_to_timespec64(zram->table[index].ac_time); 1425 copied = snprintf(kbuf + written, count, 1426 "%12zd %12lld.%06lu %c%c%c%c%c%c\n", 1427 index, (s64)ts.tv_sec, 1428 ts.tv_nsec / NSEC_PER_USEC, 1429 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1430 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1431 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1432 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1433 zram_get_priority(zram, index) ? 'r' : '.', 1434 zram_test_flag(zram, index, 1435 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1436 1437 if (count <= copied) { 1438 zram_slot_unlock(zram, index); 1439 break; 1440 } 1441 written += copied; 1442 count -= copied; 1443 next: 1444 zram_slot_unlock(zram, index); 1445 *ppos += 1; 1446 } 1447 1448 up_read(&zram->init_lock); 1449 if (copy_to_user(buf, kbuf, written)) 1450 written = -EFAULT; 1451 kvfree(kbuf); 1452 1453 return written; 1454 } 1455 1456 static const struct file_operations proc_zram_block_state_op = { 1457 .open = simple_open, 1458 .read = read_block_state, 1459 .llseek = default_llseek, 1460 }; 1461 1462 static void zram_debugfs_register(struct zram *zram) 1463 { 1464 if (!zram_debugfs_root) 1465 return; 1466 1467 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1468 zram_debugfs_root); 1469 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1470 zram, &proc_zram_block_state_op); 1471 } 1472 1473 static void zram_debugfs_unregister(struct zram *zram) 1474 { 1475 debugfs_remove_recursive(zram->debugfs_dir); 1476 } 1477 #else 1478 static void zram_debugfs_create(void) {}; 1479 static void zram_debugfs_destroy(void) {}; 1480 static void zram_debugfs_register(struct zram *zram) {}; 1481 static void zram_debugfs_unregister(struct zram *zram) {}; 1482 #endif 1483 1484 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1485 { 1486 /* Do not free statically defined compression algorithms */ 1487 if (zram->comp_algs[prio] != default_compressor) 1488 kfree(zram->comp_algs[prio]); 1489 1490 zram->comp_algs[prio] = alg; 1491 } 1492 1493 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1494 { 1495 char *compressor; 1496 size_t sz; 1497 1498 sz = strlen(buf); 1499 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1500 return -E2BIG; 1501 1502 compressor = kstrdup(buf, GFP_KERNEL); 1503 if (!compressor) 1504 return -ENOMEM; 1505 1506 /* ignore trailing newline */ 1507 if (sz > 0 && compressor[sz - 1] == '\n') 1508 compressor[sz - 1] = 0x00; 1509 1510 if (!zcomp_available_algorithm(compressor)) { 1511 kfree(compressor); 1512 return -EINVAL; 1513 } 1514 1515 down_write(&zram->init_lock); 1516 if (init_done(zram)) { 1517 up_write(&zram->init_lock); 1518 kfree(compressor); 1519 pr_info("Can't change algorithm for initialized device\n"); 1520 return -EBUSY; 1521 } 1522 1523 comp_algorithm_set(zram, prio, compressor); 1524 up_write(&zram->init_lock); 1525 return 0; 1526 } 1527 1528 static void comp_params_reset(struct zram *zram, u32 prio) 1529 { 1530 struct zcomp_params *params = &zram->params[prio]; 1531 1532 vfree(params->dict); 1533 params->level = ZCOMP_PARAM_NOT_SET; 1534 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1535 params->dict_sz = 0; 1536 params->dict = NULL; 1537 } 1538 1539 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1540 const char *dict_path, 1541 struct deflate_params *deflate_params) 1542 { 1543 ssize_t sz = 0; 1544 1545 comp_params_reset(zram, prio); 1546 1547 if (dict_path) { 1548 sz = kernel_read_file_from_path(dict_path, 0, 1549 &zram->params[prio].dict, 1550 INT_MAX, 1551 NULL, 1552 READING_POLICY); 1553 if (sz < 0) 1554 return -EINVAL; 1555 } 1556 1557 zram->params[prio].dict_sz = sz; 1558 zram->params[prio].level = level; 1559 zram->params[prio].deflate.winbits = deflate_params->winbits; 1560 return 0; 1561 } 1562 1563 static ssize_t algorithm_params_store(struct device *dev, 1564 struct device_attribute *attr, 1565 const char *buf, 1566 size_t len) 1567 { 1568 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1569 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1570 struct deflate_params deflate_params; 1571 struct zram *zram = dev_to_zram(dev); 1572 int ret; 1573 1574 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1575 1576 args = skip_spaces(buf); 1577 while (*args) { 1578 args = next_arg(args, ¶m, &val); 1579 1580 if (!val || !*val) 1581 return -EINVAL; 1582 1583 if (!strcmp(param, "priority")) { 1584 ret = kstrtoint(val, 10, &prio); 1585 if (ret) 1586 return ret; 1587 continue; 1588 } 1589 1590 if (!strcmp(param, "level")) { 1591 ret = kstrtoint(val, 10, &level); 1592 if (ret) 1593 return ret; 1594 continue; 1595 } 1596 1597 if (!strcmp(param, "algo")) { 1598 algo = val; 1599 continue; 1600 } 1601 1602 if (!strcmp(param, "dict")) { 1603 dict_path = val; 1604 continue; 1605 } 1606 1607 if (!strcmp(param, "deflate.winbits")) { 1608 ret = kstrtoint(val, 10, &deflate_params.winbits); 1609 if (ret) 1610 return ret; 1611 continue; 1612 } 1613 } 1614 1615 /* Lookup priority by algorithm name */ 1616 if (algo) { 1617 s32 p; 1618 1619 prio = -EINVAL; 1620 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1621 if (!zram->comp_algs[p]) 1622 continue; 1623 1624 if (!strcmp(zram->comp_algs[p], algo)) { 1625 prio = p; 1626 break; 1627 } 1628 } 1629 } 1630 1631 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1632 return -EINVAL; 1633 1634 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1635 return ret ? ret : len; 1636 } 1637 1638 static ssize_t comp_algorithm_show(struct device *dev, 1639 struct device_attribute *attr, 1640 char *buf) 1641 { 1642 struct zram *zram = dev_to_zram(dev); 1643 ssize_t sz; 1644 1645 down_read(&zram->init_lock); 1646 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1647 up_read(&zram->init_lock); 1648 return sz; 1649 } 1650 1651 static ssize_t comp_algorithm_store(struct device *dev, 1652 struct device_attribute *attr, 1653 const char *buf, 1654 size_t len) 1655 { 1656 struct zram *zram = dev_to_zram(dev); 1657 int ret; 1658 1659 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1660 return ret ? ret : len; 1661 } 1662 1663 #ifdef CONFIG_ZRAM_MULTI_COMP 1664 static ssize_t recomp_algorithm_show(struct device *dev, 1665 struct device_attribute *attr, 1666 char *buf) 1667 { 1668 struct zram *zram = dev_to_zram(dev); 1669 ssize_t sz = 0; 1670 u32 prio; 1671 1672 down_read(&zram->init_lock); 1673 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1674 if (!zram->comp_algs[prio]) 1675 continue; 1676 1677 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1678 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1679 } 1680 up_read(&zram->init_lock); 1681 return sz; 1682 } 1683 1684 static ssize_t recomp_algorithm_store(struct device *dev, 1685 struct device_attribute *attr, 1686 const char *buf, 1687 size_t len) 1688 { 1689 struct zram *zram = dev_to_zram(dev); 1690 int prio = ZRAM_SECONDARY_COMP; 1691 char *args, *param, *val; 1692 char *alg = NULL; 1693 int ret; 1694 1695 args = skip_spaces(buf); 1696 while (*args) { 1697 args = next_arg(args, ¶m, &val); 1698 1699 if (!val || !*val) 1700 return -EINVAL; 1701 1702 if (!strcmp(param, "algo")) { 1703 alg = val; 1704 continue; 1705 } 1706 1707 if (!strcmp(param, "priority")) { 1708 ret = kstrtoint(val, 10, &prio); 1709 if (ret) 1710 return ret; 1711 continue; 1712 } 1713 } 1714 1715 if (!alg) 1716 return -EINVAL; 1717 1718 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1719 return -EINVAL; 1720 1721 ret = __comp_algorithm_store(zram, prio, alg); 1722 return ret ? ret : len; 1723 } 1724 #endif 1725 1726 static ssize_t compact_store(struct device *dev, 1727 struct device_attribute *attr, const char *buf, size_t len) 1728 { 1729 struct zram *zram = dev_to_zram(dev); 1730 1731 down_read(&zram->init_lock); 1732 if (!init_done(zram)) { 1733 up_read(&zram->init_lock); 1734 return -EINVAL; 1735 } 1736 1737 zs_compact(zram->mem_pool); 1738 up_read(&zram->init_lock); 1739 1740 return len; 1741 } 1742 1743 static ssize_t io_stat_show(struct device *dev, 1744 struct device_attribute *attr, char *buf) 1745 { 1746 struct zram *zram = dev_to_zram(dev); 1747 ssize_t ret; 1748 1749 down_read(&zram->init_lock); 1750 ret = sysfs_emit(buf, 1751 "%8llu %8llu 0 %8llu\n", 1752 (u64)atomic64_read(&zram->stats.failed_reads), 1753 (u64)atomic64_read(&zram->stats.failed_writes), 1754 (u64)atomic64_read(&zram->stats.notify_free)); 1755 up_read(&zram->init_lock); 1756 1757 return ret; 1758 } 1759 1760 static ssize_t mm_stat_show(struct device *dev, 1761 struct device_attribute *attr, char *buf) 1762 { 1763 struct zram *zram = dev_to_zram(dev); 1764 struct zs_pool_stats pool_stats; 1765 u64 orig_size, mem_used = 0; 1766 long max_used; 1767 ssize_t ret; 1768 1769 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1770 1771 down_read(&zram->init_lock); 1772 if (init_done(zram)) { 1773 mem_used = zs_get_total_pages(zram->mem_pool); 1774 zs_pool_stats(zram->mem_pool, &pool_stats); 1775 } 1776 1777 orig_size = atomic64_read(&zram->stats.pages_stored); 1778 max_used = atomic_long_read(&zram->stats.max_used_pages); 1779 1780 ret = sysfs_emit(buf, 1781 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1782 orig_size << PAGE_SHIFT, 1783 (u64)atomic64_read(&zram->stats.compr_data_size), 1784 mem_used << PAGE_SHIFT, 1785 zram->limit_pages << PAGE_SHIFT, 1786 max_used << PAGE_SHIFT, 1787 (u64)atomic64_read(&zram->stats.same_pages), 1788 atomic_long_read(&pool_stats.pages_compacted), 1789 (u64)atomic64_read(&zram->stats.huge_pages), 1790 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1791 up_read(&zram->init_lock); 1792 1793 return ret; 1794 } 1795 1796 #ifdef CONFIG_ZRAM_WRITEBACK 1797 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1798 static ssize_t bd_stat_show(struct device *dev, 1799 struct device_attribute *attr, char *buf) 1800 { 1801 struct zram *zram = dev_to_zram(dev); 1802 ssize_t ret; 1803 1804 down_read(&zram->init_lock); 1805 ret = sysfs_emit(buf, 1806 "%8llu %8llu %8llu\n", 1807 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1808 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1809 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1810 up_read(&zram->init_lock); 1811 1812 return ret; 1813 } 1814 #endif 1815 1816 static ssize_t debug_stat_show(struct device *dev, 1817 struct device_attribute *attr, char *buf) 1818 { 1819 int version = 1; 1820 struct zram *zram = dev_to_zram(dev); 1821 ssize_t ret; 1822 1823 down_read(&zram->init_lock); 1824 ret = sysfs_emit(buf, 1825 "version: %d\n0 %8llu\n", 1826 version, 1827 (u64)atomic64_read(&zram->stats.miss_free)); 1828 up_read(&zram->init_lock); 1829 1830 return ret; 1831 } 1832 1833 static DEVICE_ATTR_RO(io_stat); 1834 static DEVICE_ATTR_RO(mm_stat); 1835 #ifdef CONFIG_ZRAM_WRITEBACK 1836 static DEVICE_ATTR_RO(bd_stat); 1837 #endif 1838 static DEVICE_ATTR_RO(debug_stat); 1839 1840 static void zram_meta_free(struct zram *zram, u64 disksize) 1841 { 1842 size_t num_pages = disksize >> PAGE_SHIFT; 1843 size_t index; 1844 1845 if (!zram->table) 1846 return; 1847 1848 /* Free all pages that are still in this zram device */ 1849 for (index = 0; index < num_pages; index++) 1850 zram_free_page(zram, index); 1851 1852 zs_destroy_pool(zram->mem_pool); 1853 vfree(zram->table); 1854 zram->table = NULL; 1855 } 1856 1857 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1858 { 1859 size_t num_pages, index; 1860 1861 num_pages = disksize >> PAGE_SHIFT; 1862 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1863 if (!zram->table) 1864 return false; 1865 1866 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1867 if (!zram->mem_pool) { 1868 vfree(zram->table); 1869 zram->table = NULL; 1870 return false; 1871 } 1872 1873 if (!huge_class_size) 1874 huge_class_size = zs_huge_class_size(zram->mem_pool); 1875 1876 for (index = 0; index < num_pages; index++) 1877 zram_slot_lock_init(zram, index); 1878 1879 return true; 1880 } 1881 1882 static void zram_free_page(struct zram *zram, size_t index) 1883 { 1884 unsigned long handle; 1885 1886 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 1887 zram->table[index].ac_time = 0; 1888 #endif 1889 1890 zram_clear_flag(zram, index, ZRAM_IDLE); 1891 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1892 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 1893 zram_set_priority(zram, index, 0); 1894 1895 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1896 zram_clear_flag(zram, index, ZRAM_HUGE); 1897 atomic64_dec(&zram->stats.huge_pages); 1898 } 1899 1900 if (zram_test_flag(zram, index, ZRAM_WB)) { 1901 zram_clear_flag(zram, index, ZRAM_WB); 1902 zram_release_bdev_block(zram, zram_get_handle(zram, index)); 1903 goto out; 1904 } 1905 1906 /* 1907 * No memory is allocated for same element filled pages. 1908 * Simply clear same page flag. 1909 */ 1910 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1911 zram_clear_flag(zram, index, ZRAM_SAME); 1912 atomic64_dec(&zram->stats.same_pages); 1913 goto out; 1914 } 1915 1916 handle = zram_get_handle(zram, index); 1917 if (!handle) 1918 return; 1919 1920 zs_free(zram->mem_pool, handle); 1921 1922 atomic64_sub(zram_get_obj_size(zram, index), 1923 &zram->stats.compr_data_size); 1924 out: 1925 atomic64_dec(&zram->stats.pages_stored); 1926 zram_set_handle(zram, index, 0); 1927 zram_set_obj_size(zram, index, 0); 1928 } 1929 1930 static int read_same_filled_page(struct zram *zram, struct page *page, 1931 u32 index) 1932 { 1933 void *mem; 1934 1935 mem = kmap_local_page(page); 1936 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index)); 1937 kunmap_local(mem); 1938 return 0; 1939 } 1940 1941 static int read_incompressible_page(struct zram *zram, struct page *page, 1942 u32 index) 1943 { 1944 unsigned long handle; 1945 void *src, *dst; 1946 1947 handle = zram_get_handle(zram, index); 1948 src = zs_obj_read_begin(zram->mem_pool, handle, NULL); 1949 dst = kmap_local_page(page); 1950 copy_page(dst, src); 1951 kunmap_local(dst); 1952 zs_obj_read_end(zram->mem_pool, handle, src); 1953 1954 return 0; 1955 } 1956 1957 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 1958 { 1959 struct zcomp_strm *zstrm; 1960 unsigned long handle; 1961 unsigned int size; 1962 void *src, *dst; 1963 int ret, prio; 1964 1965 handle = zram_get_handle(zram, index); 1966 size = zram_get_obj_size(zram, index); 1967 prio = zram_get_priority(zram, index); 1968 1969 zstrm = zcomp_stream_get(zram->comps[prio]); 1970 src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); 1971 dst = kmap_local_page(page); 1972 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 1973 kunmap_local(dst); 1974 zs_obj_read_end(zram->mem_pool, handle, src); 1975 zcomp_stream_put(zstrm); 1976 1977 return ret; 1978 } 1979 1980 /* 1981 * Reads (decompresses if needed) a page from zspool (zsmalloc). 1982 * Corresponding ZRAM slot should be locked. 1983 */ 1984 static int zram_read_from_zspool(struct zram *zram, struct page *page, 1985 u32 index) 1986 { 1987 if (zram_test_flag(zram, index, ZRAM_SAME) || 1988 !zram_get_handle(zram, index)) 1989 return read_same_filled_page(zram, page, index); 1990 1991 if (!zram_test_flag(zram, index, ZRAM_HUGE)) 1992 return read_compressed_page(zram, page, index); 1993 else 1994 return read_incompressible_page(zram, page, index); 1995 } 1996 1997 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 1998 struct bio *parent) 1999 { 2000 int ret; 2001 2002 zram_slot_lock(zram, index); 2003 if (!zram_test_flag(zram, index, ZRAM_WB)) { 2004 /* Slot should be locked through out the function call */ 2005 ret = zram_read_from_zspool(zram, page, index); 2006 zram_slot_unlock(zram, index); 2007 } else { 2008 unsigned long blk_idx = zram_get_handle(zram, index); 2009 2010 /* 2011 * The slot should be unlocked before reading from the backing 2012 * device. 2013 */ 2014 zram_slot_unlock(zram, index); 2015 ret = read_from_bdev(zram, page, blk_idx, parent); 2016 } 2017 2018 /* Should NEVER happen. Return bio error if it does. */ 2019 if (WARN_ON(ret < 0)) 2020 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2021 2022 return ret; 2023 } 2024 2025 /* 2026 * Use a temporary buffer to decompress the page, as the decompressor 2027 * always expects a full page for the output. 2028 */ 2029 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2030 u32 index, int offset) 2031 { 2032 struct page *page = alloc_page(GFP_NOIO); 2033 int ret; 2034 2035 if (!page) 2036 return -ENOMEM; 2037 ret = zram_read_page(zram, page, index, NULL); 2038 if (likely(!ret)) 2039 memcpy_to_bvec(bvec, page_address(page) + offset); 2040 __free_page(page); 2041 return ret; 2042 } 2043 2044 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2045 u32 index, int offset, struct bio *bio) 2046 { 2047 if (is_partial_io(bvec)) 2048 return zram_bvec_read_partial(zram, bvec, index, offset); 2049 return zram_read_page(zram, bvec->bv_page, index, bio); 2050 } 2051 2052 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2053 u32 index) 2054 { 2055 zram_slot_lock(zram, index); 2056 zram_free_page(zram, index); 2057 zram_set_flag(zram, index, ZRAM_SAME); 2058 zram_set_handle(zram, index, fill); 2059 zram_slot_unlock(zram, index); 2060 2061 atomic64_inc(&zram->stats.same_pages); 2062 atomic64_inc(&zram->stats.pages_stored); 2063 2064 return 0; 2065 } 2066 2067 static int write_incompressible_page(struct zram *zram, struct page *page, 2068 u32 index) 2069 { 2070 unsigned long handle; 2071 void *src; 2072 2073 /* 2074 * This function is called from preemptible context so we don't need 2075 * to do optimistic and fallback to pessimistic handle allocation, 2076 * like we do for compressible pages. 2077 */ 2078 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2079 GFP_NOIO | __GFP_NOWARN | 2080 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2081 if (IS_ERR_VALUE(handle)) 2082 return PTR_ERR((void *)handle); 2083 2084 if (!zram_can_store_page(zram)) { 2085 zs_free(zram->mem_pool, handle); 2086 return -ENOMEM; 2087 } 2088 2089 src = kmap_local_page(page); 2090 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2091 kunmap_local(src); 2092 2093 zram_slot_lock(zram, index); 2094 zram_free_page(zram, index); 2095 zram_set_flag(zram, index, ZRAM_HUGE); 2096 zram_set_handle(zram, index, handle); 2097 zram_set_obj_size(zram, index, PAGE_SIZE); 2098 zram_slot_unlock(zram, index); 2099 2100 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2101 atomic64_inc(&zram->stats.huge_pages); 2102 atomic64_inc(&zram->stats.huge_pages_since); 2103 atomic64_inc(&zram->stats.pages_stored); 2104 2105 return 0; 2106 } 2107 2108 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2109 { 2110 int ret = 0; 2111 unsigned long handle; 2112 unsigned int comp_len; 2113 void *mem; 2114 struct zcomp_strm *zstrm; 2115 unsigned long element; 2116 bool same_filled; 2117 2118 mem = kmap_local_page(page); 2119 same_filled = page_same_filled(mem, &element); 2120 kunmap_local(mem); 2121 if (same_filled) 2122 return write_same_filled_page(zram, element, index); 2123 2124 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2125 mem = kmap_local_page(page); 2126 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2127 mem, &comp_len); 2128 kunmap_local(mem); 2129 2130 if (unlikely(ret)) { 2131 zcomp_stream_put(zstrm); 2132 pr_err("Compression failed! err=%d\n", ret); 2133 return ret; 2134 } 2135 2136 if (comp_len >= huge_class_size) { 2137 zcomp_stream_put(zstrm); 2138 return write_incompressible_page(zram, page, index); 2139 } 2140 2141 handle = zs_malloc(zram->mem_pool, comp_len, 2142 GFP_NOIO | __GFP_NOWARN | 2143 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2144 if (IS_ERR_VALUE(handle)) { 2145 zcomp_stream_put(zstrm); 2146 return PTR_ERR((void *)handle); 2147 } 2148 2149 if (!zram_can_store_page(zram)) { 2150 zcomp_stream_put(zstrm); 2151 zs_free(zram->mem_pool, handle); 2152 return -ENOMEM; 2153 } 2154 2155 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2156 zcomp_stream_put(zstrm); 2157 2158 zram_slot_lock(zram, index); 2159 zram_free_page(zram, index); 2160 zram_set_handle(zram, index, handle); 2161 zram_set_obj_size(zram, index, comp_len); 2162 zram_slot_unlock(zram, index); 2163 2164 /* Update stats */ 2165 atomic64_inc(&zram->stats.pages_stored); 2166 atomic64_add(comp_len, &zram->stats.compr_data_size); 2167 2168 return ret; 2169 } 2170 2171 /* 2172 * This is a partial IO. Read the full page before writing the changes. 2173 */ 2174 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2175 u32 index, int offset, struct bio *bio) 2176 { 2177 struct page *page = alloc_page(GFP_NOIO); 2178 int ret; 2179 2180 if (!page) 2181 return -ENOMEM; 2182 2183 ret = zram_read_page(zram, page, index, bio); 2184 if (!ret) { 2185 memcpy_from_bvec(page_address(page) + offset, bvec); 2186 ret = zram_write_page(zram, page, index); 2187 } 2188 __free_page(page); 2189 return ret; 2190 } 2191 2192 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2193 u32 index, int offset, struct bio *bio) 2194 { 2195 if (is_partial_io(bvec)) 2196 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 2197 return zram_write_page(zram, bvec->bv_page, index); 2198 } 2199 2200 #ifdef CONFIG_ZRAM_MULTI_COMP 2201 #define RECOMPRESS_IDLE (1 << 0) 2202 #define RECOMPRESS_HUGE (1 << 1) 2203 2204 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, 2205 struct zram_pp_ctl *ctl) 2206 { 2207 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2208 unsigned long index; 2209 2210 for (index = 0; index < nr_pages; index++) { 2211 bool ok = true; 2212 2213 zram_slot_lock(zram, index); 2214 if (!zram_allocated(zram, index)) 2215 goto next; 2216 2217 if (mode & RECOMPRESS_IDLE && 2218 !zram_test_flag(zram, index, ZRAM_IDLE)) 2219 goto next; 2220 2221 if (mode & RECOMPRESS_HUGE && 2222 !zram_test_flag(zram, index, ZRAM_HUGE)) 2223 goto next; 2224 2225 if (zram_test_flag(zram, index, ZRAM_WB) || 2226 zram_test_flag(zram, index, ZRAM_SAME) || 2227 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2228 goto next; 2229 2230 /* Already compressed with same of higher priority */ 2231 if (zram_get_priority(zram, index) + 1 >= prio_max) 2232 goto next; 2233 2234 ok = place_pp_slot(zram, ctl, index); 2235 next: 2236 zram_slot_unlock(zram, index); 2237 if (!ok) 2238 break; 2239 } 2240 2241 return 0; 2242 } 2243 2244 /* 2245 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2246 * attempt to compress it using provided compression algorithm priority 2247 * (which is potentially more effective). 2248 * 2249 * Corresponding ZRAM slot should be locked. 2250 */ 2251 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2252 u64 *num_recomp_pages, u32 threshold, u32 prio, 2253 u32 prio_max) 2254 { 2255 struct zcomp_strm *zstrm = NULL; 2256 unsigned long handle_old; 2257 unsigned long handle_new; 2258 unsigned int comp_len_old; 2259 unsigned int comp_len_new; 2260 unsigned int class_index_old; 2261 unsigned int class_index_new; 2262 void *src; 2263 int ret = 0; 2264 2265 handle_old = zram_get_handle(zram, index); 2266 if (!handle_old) 2267 return -EINVAL; 2268 2269 comp_len_old = zram_get_obj_size(zram, index); 2270 /* 2271 * Do not recompress objects that are already "small enough". 2272 */ 2273 if (comp_len_old < threshold) 2274 return 0; 2275 2276 ret = zram_read_from_zspool(zram, page, index); 2277 if (ret) 2278 return ret; 2279 2280 /* 2281 * We touched this entry so mark it as non-IDLE. This makes sure that 2282 * we don't preserve IDLE flag and don't incorrectly pick this entry 2283 * for different post-processing type (e.g. writeback). 2284 */ 2285 zram_clear_flag(zram, index, ZRAM_IDLE); 2286 2287 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2288 2289 prio = max(prio, zram_get_priority(zram, index) + 1); 2290 /* 2291 * Recompression slots scan should not select slots that are 2292 * already compressed with a higher priority algorithm, but 2293 * just in case 2294 */ 2295 if (prio >= prio_max) 2296 return 0; 2297 2298 /* 2299 * Iterate the secondary comp algorithms list (in order of priority) 2300 * and try to recompress the page. 2301 */ 2302 for (; prio < prio_max; prio++) { 2303 if (!zram->comps[prio]) 2304 continue; 2305 2306 zstrm = zcomp_stream_get(zram->comps[prio]); 2307 src = kmap_local_page(page); 2308 ret = zcomp_compress(zram->comps[prio], zstrm, 2309 src, &comp_len_new); 2310 kunmap_local(src); 2311 2312 if (ret) { 2313 zcomp_stream_put(zstrm); 2314 zstrm = NULL; 2315 break; 2316 } 2317 2318 class_index_new = zs_lookup_class_index(zram->mem_pool, 2319 comp_len_new); 2320 2321 /* Continue until we make progress */ 2322 if (class_index_new >= class_index_old || 2323 (threshold && comp_len_new >= threshold)) { 2324 zcomp_stream_put(zstrm); 2325 zstrm = NULL; 2326 continue; 2327 } 2328 2329 /* Recompression was successful so break out */ 2330 break; 2331 } 2332 2333 /* 2334 * Decrement the limit (if set) on pages we can recompress, even 2335 * when current recompression was unsuccessful or did not compress 2336 * the page below the threshold, because we still spent resources 2337 * on it. 2338 */ 2339 if (*num_recomp_pages) 2340 *num_recomp_pages -= 1; 2341 2342 /* Compression error */ 2343 if (ret) 2344 return ret; 2345 2346 if (!zstrm) { 2347 /* 2348 * Secondary algorithms failed to re-compress the page 2349 * in a way that would save memory. 2350 * 2351 * Mark the object incompressible if the max-priority 2352 * algorithm couldn't re-compress it. 2353 */ 2354 if (prio < zram->num_active_comps) 2355 return 0; 2356 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2357 return 0; 2358 } 2359 2360 /* 2361 * We are holding per-CPU stream mutex and entry lock so better 2362 * avoid direct reclaim. Allocation error is not fatal since 2363 * we still have the old object in the mem_pool. 2364 * 2365 * XXX: technically, the node we really want here is the node that holds 2366 * the original compressed data. But that would require us to modify 2367 * zsmalloc API to return this information. For now, we will make do with 2368 * the node of the page allocated for recompression. 2369 */ 2370 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2371 GFP_NOIO | __GFP_NOWARN | 2372 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2373 if (IS_ERR_VALUE(handle_new)) { 2374 zcomp_stream_put(zstrm); 2375 return PTR_ERR((void *)handle_new); 2376 } 2377 2378 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2379 zcomp_stream_put(zstrm); 2380 2381 zram_free_page(zram, index); 2382 zram_set_handle(zram, index, handle_new); 2383 zram_set_obj_size(zram, index, comp_len_new); 2384 zram_set_priority(zram, index, prio); 2385 2386 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2387 atomic64_inc(&zram->stats.pages_stored); 2388 2389 return 0; 2390 } 2391 2392 static ssize_t recompress_store(struct device *dev, 2393 struct device_attribute *attr, 2394 const char *buf, size_t len) 2395 { 2396 struct zram *zram = dev_to_zram(dev); 2397 char *args, *param, *val, *algo = NULL; 2398 u64 num_recomp_pages = ULLONG_MAX; 2399 struct zram_pp_ctl *ctl = NULL; 2400 struct zram_pp_slot *pps; 2401 u32 mode = 0, threshold = 0; 2402 u32 prio, prio_max; 2403 struct page *page = NULL; 2404 ssize_t ret; 2405 2406 prio = ZRAM_SECONDARY_COMP; 2407 prio_max = zram->num_active_comps; 2408 2409 args = skip_spaces(buf); 2410 while (*args) { 2411 args = next_arg(args, ¶m, &val); 2412 2413 if (!val || !*val) 2414 return -EINVAL; 2415 2416 if (!strcmp(param, "type")) { 2417 if (!strcmp(val, "idle")) 2418 mode = RECOMPRESS_IDLE; 2419 if (!strcmp(val, "huge")) 2420 mode = RECOMPRESS_HUGE; 2421 if (!strcmp(val, "huge_idle")) 2422 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2423 continue; 2424 } 2425 2426 if (!strcmp(param, "max_pages")) { 2427 /* 2428 * Limit the number of entries (pages) we attempt to 2429 * recompress. 2430 */ 2431 ret = kstrtoull(val, 10, &num_recomp_pages); 2432 if (ret) 2433 return ret; 2434 continue; 2435 } 2436 2437 if (!strcmp(param, "threshold")) { 2438 /* 2439 * We will re-compress only idle objects equal or 2440 * greater in size than watermark. 2441 */ 2442 ret = kstrtouint(val, 10, &threshold); 2443 if (ret) 2444 return ret; 2445 continue; 2446 } 2447 2448 if (!strcmp(param, "algo")) { 2449 algo = val; 2450 continue; 2451 } 2452 2453 if (!strcmp(param, "priority")) { 2454 ret = kstrtouint(val, 10, &prio); 2455 if (ret) 2456 return ret; 2457 2458 if (prio == ZRAM_PRIMARY_COMP) 2459 prio = ZRAM_SECONDARY_COMP; 2460 2461 prio_max = prio + 1; 2462 continue; 2463 } 2464 } 2465 2466 if (threshold >= huge_class_size) 2467 return -EINVAL; 2468 2469 down_read(&zram->init_lock); 2470 if (!init_done(zram)) { 2471 ret = -EINVAL; 2472 goto release_init_lock; 2473 } 2474 2475 /* Do not permit concurrent post-processing actions. */ 2476 if (atomic_xchg(&zram->pp_in_progress, 1)) { 2477 up_read(&zram->init_lock); 2478 return -EAGAIN; 2479 } 2480 2481 if (algo) { 2482 bool found = false; 2483 2484 for (; prio < ZRAM_MAX_COMPS; prio++) { 2485 if (!zram->comp_algs[prio]) 2486 continue; 2487 2488 if (!strcmp(zram->comp_algs[prio], algo)) { 2489 prio_max = prio + 1; 2490 found = true; 2491 break; 2492 } 2493 } 2494 2495 if (!found) { 2496 ret = -EINVAL; 2497 goto release_init_lock; 2498 } 2499 } 2500 2501 prio_max = min(prio_max, (u32)zram->num_active_comps); 2502 if (prio >= prio_max) { 2503 ret = -EINVAL; 2504 goto release_init_lock; 2505 } 2506 2507 page = alloc_page(GFP_KERNEL); 2508 if (!page) { 2509 ret = -ENOMEM; 2510 goto release_init_lock; 2511 } 2512 2513 ctl = init_pp_ctl(); 2514 if (!ctl) { 2515 ret = -ENOMEM; 2516 goto release_init_lock; 2517 } 2518 2519 scan_slots_for_recompress(zram, mode, prio_max, ctl); 2520 2521 ret = len; 2522 while ((pps = select_pp_slot(ctl))) { 2523 int err = 0; 2524 2525 if (!num_recomp_pages) 2526 break; 2527 2528 zram_slot_lock(zram, pps->index); 2529 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) 2530 goto next; 2531 2532 err = recompress_slot(zram, pps->index, page, 2533 &num_recomp_pages, threshold, 2534 prio, prio_max); 2535 next: 2536 zram_slot_unlock(zram, pps->index); 2537 release_pp_slot(zram, pps); 2538 2539 if (err) { 2540 ret = err; 2541 break; 2542 } 2543 2544 cond_resched(); 2545 } 2546 2547 release_init_lock: 2548 if (page) 2549 __free_page(page); 2550 release_pp_ctl(zram, ctl); 2551 atomic_set(&zram->pp_in_progress, 0); 2552 up_read(&zram->init_lock); 2553 return ret; 2554 } 2555 #endif 2556 2557 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2558 { 2559 size_t n = bio->bi_iter.bi_size; 2560 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2561 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2562 SECTOR_SHIFT; 2563 2564 /* 2565 * zram manages data in physical block size units. Because logical block 2566 * size isn't identical with physical block size on some arch, we 2567 * could get a discard request pointing to a specific offset within a 2568 * certain physical block. Although we can handle this request by 2569 * reading that physiclal block and decompressing and partially zeroing 2570 * and re-compressing and then re-storing it, this isn't reasonable 2571 * because our intent with a discard request is to save memory. So 2572 * skipping this logical block is appropriate here. 2573 */ 2574 if (offset) { 2575 if (n <= (PAGE_SIZE - offset)) 2576 return; 2577 2578 n -= (PAGE_SIZE - offset); 2579 index++; 2580 } 2581 2582 while (n >= PAGE_SIZE) { 2583 zram_slot_lock(zram, index); 2584 zram_free_page(zram, index); 2585 zram_slot_unlock(zram, index); 2586 atomic64_inc(&zram->stats.notify_free); 2587 index++; 2588 n -= PAGE_SIZE; 2589 } 2590 2591 bio_endio(bio); 2592 } 2593 2594 static void zram_bio_read(struct zram *zram, struct bio *bio) 2595 { 2596 unsigned long start_time = bio_start_io_acct(bio); 2597 struct bvec_iter iter = bio->bi_iter; 2598 2599 do { 2600 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2601 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2602 SECTOR_SHIFT; 2603 struct bio_vec bv = bio_iter_iovec(bio, iter); 2604 2605 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2606 2607 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2608 atomic64_inc(&zram->stats.failed_reads); 2609 bio->bi_status = BLK_STS_IOERR; 2610 break; 2611 } 2612 flush_dcache_page(bv.bv_page); 2613 2614 zram_slot_lock(zram, index); 2615 zram_accessed(zram, index); 2616 zram_slot_unlock(zram, index); 2617 2618 bio_advance_iter_single(bio, &iter, bv.bv_len); 2619 } while (iter.bi_size); 2620 2621 bio_end_io_acct(bio, start_time); 2622 bio_endio(bio); 2623 } 2624 2625 static void zram_bio_write(struct zram *zram, struct bio *bio) 2626 { 2627 unsigned long start_time = bio_start_io_acct(bio); 2628 struct bvec_iter iter = bio->bi_iter; 2629 2630 do { 2631 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2632 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2633 SECTOR_SHIFT; 2634 struct bio_vec bv = bio_iter_iovec(bio, iter); 2635 2636 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2637 2638 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2639 atomic64_inc(&zram->stats.failed_writes); 2640 bio->bi_status = BLK_STS_IOERR; 2641 break; 2642 } 2643 2644 zram_slot_lock(zram, index); 2645 zram_accessed(zram, index); 2646 zram_slot_unlock(zram, index); 2647 2648 bio_advance_iter_single(bio, &iter, bv.bv_len); 2649 } while (iter.bi_size); 2650 2651 bio_end_io_acct(bio, start_time); 2652 bio_endio(bio); 2653 } 2654 2655 /* 2656 * Handler function for all zram I/O requests. 2657 */ 2658 static void zram_submit_bio(struct bio *bio) 2659 { 2660 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2661 2662 switch (bio_op(bio)) { 2663 case REQ_OP_READ: 2664 zram_bio_read(zram, bio); 2665 break; 2666 case REQ_OP_WRITE: 2667 zram_bio_write(zram, bio); 2668 break; 2669 case REQ_OP_DISCARD: 2670 case REQ_OP_WRITE_ZEROES: 2671 zram_bio_discard(zram, bio); 2672 break; 2673 default: 2674 WARN_ON_ONCE(1); 2675 bio_endio(bio); 2676 } 2677 } 2678 2679 static void zram_slot_free_notify(struct block_device *bdev, 2680 unsigned long index) 2681 { 2682 struct zram *zram; 2683 2684 zram = bdev->bd_disk->private_data; 2685 2686 atomic64_inc(&zram->stats.notify_free); 2687 if (!zram_slot_trylock(zram, index)) { 2688 atomic64_inc(&zram->stats.miss_free); 2689 return; 2690 } 2691 2692 zram_free_page(zram, index); 2693 zram_slot_unlock(zram, index); 2694 } 2695 2696 static void zram_comp_params_reset(struct zram *zram) 2697 { 2698 u32 prio; 2699 2700 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2701 comp_params_reset(zram, prio); 2702 } 2703 } 2704 2705 static void zram_destroy_comps(struct zram *zram) 2706 { 2707 u32 prio; 2708 2709 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2710 struct zcomp *comp = zram->comps[prio]; 2711 2712 zram->comps[prio] = NULL; 2713 if (!comp) 2714 continue; 2715 zcomp_destroy(comp); 2716 zram->num_active_comps--; 2717 } 2718 2719 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2720 /* Do not free statically defined compression algorithms */ 2721 if (zram->comp_algs[prio] != default_compressor) 2722 kfree(zram->comp_algs[prio]); 2723 zram->comp_algs[prio] = NULL; 2724 } 2725 2726 zram_comp_params_reset(zram); 2727 } 2728 2729 static void zram_reset_device(struct zram *zram) 2730 { 2731 down_write(&zram->init_lock); 2732 2733 zram->limit_pages = 0; 2734 2735 set_capacity_and_notify(zram->disk, 0); 2736 part_stat_set_all(zram->disk->part0, 0); 2737 2738 /* I/O operation under all of CPU are done so let's free */ 2739 zram_meta_free(zram, zram->disksize); 2740 zram->disksize = 0; 2741 zram_destroy_comps(zram); 2742 memset(&zram->stats, 0, sizeof(zram->stats)); 2743 atomic_set(&zram->pp_in_progress, 0); 2744 reset_bdev(zram); 2745 2746 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2747 up_write(&zram->init_lock); 2748 } 2749 2750 static ssize_t disksize_store(struct device *dev, 2751 struct device_attribute *attr, const char *buf, size_t len) 2752 { 2753 u64 disksize; 2754 struct zcomp *comp; 2755 struct zram *zram = dev_to_zram(dev); 2756 int err; 2757 u32 prio; 2758 2759 disksize = memparse(buf, NULL); 2760 if (!disksize) 2761 return -EINVAL; 2762 2763 down_write(&zram->init_lock); 2764 if (init_done(zram)) { 2765 pr_info("Cannot change disksize for initialized device\n"); 2766 err = -EBUSY; 2767 goto out_unlock; 2768 } 2769 2770 disksize = PAGE_ALIGN(disksize); 2771 if (!zram_meta_alloc(zram, disksize)) { 2772 err = -ENOMEM; 2773 goto out_unlock; 2774 } 2775 2776 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2777 if (!zram->comp_algs[prio]) 2778 continue; 2779 2780 comp = zcomp_create(zram->comp_algs[prio], 2781 &zram->params[prio]); 2782 if (IS_ERR(comp)) { 2783 pr_err("Cannot initialise %s compressing backend\n", 2784 zram->comp_algs[prio]); 2785 err = PTR_ERR(comp); 2786 goto out_free_comps; 2787 } 2788 2789 zram->comps[prio] = comp; 2790 zram->num_active_comps++; 2791 } 2792 zram->disksize = disksize; 2793 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2794 up_write(&zram->init_lock); 2795 2796 return len; 2797 2798 out_free_comps: 2799 zram_destroy_comps(zram); 2800 zram_meta_free(zram, disksize); 2801 out_unlock: 2802 up_write(&zram->init_lock); 2803 return err; 2804 } 2805 2806 static ssize_t reset_store(struct device *dev, 2807 struct device_attribute *attr, const char *buf, size_t len) 2808 { 2809 int ret; 2810 unsigned short do_reset; 2811 struct zram *zram; 2812 struct gendisk *disk; 2813 2814 ret = kstrtou16(buf, 10, &do_reset); 2815 if (ret) 2816 return ret; 2817 2818 if (!do_reset) 2819 return -EINVAL; 2820 2821 zram = dev_to_zram(dev); 2822 disk = zram->disk; 2823 2824 mutex_lock(&disk->open_mutex); 2825 /* Do not reset an active device or claimed device */ 2826 if (disk_openers(disk) || zram->claim) { 2827 mutex_unlock(&disk->open_mutex); 2828 return -EBUSY; 2829 } 2830 2831 /* From now on, anyone can't open /dev/zram[0-9] */ 2832 zram->claim = true; 2833 mutex_unlock(&disk->open_mutex); 2834 2835 /* Make sure all the pending I/O are finished */ 2836 sync_blockdev(disk->part0); 2837 zram_reset_device(zram); 2838 2839 mutex_lock(&disk->open_mutex); 2840 zram->claim = false; 2841 mutex_unlock(&disk->open_mutex); 2842 2843 return len; 2844 } 2845 2846 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2847 { 2848 struct zram *zram = disk->private_data; 2849 2850 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2851 2852 /* zram was claimed to reset so open request fails */ 2853 if (zram->claim) 2854 return -EBUSY; 2855 return 0; 2856 } 2857 2858 static const struct block_device_operations zram_devops = { 2859 .open = zram_open, 2860 .submit_bio = zram_submit_bio, 2861 .swap_slot_free_notify = zram_slot_free_notify, 2862 .owner = THIS_MODULE 2863 }; 2864 2865 static DEVICE_ATTR_WO(compact); 2866 static DEVICE_ATTR_RW(disksize); 2867 static DEVICE_ATTR_RO(initstate); 2868 static DEVICE_ATTR_WO(reset); 2869 static DEVICE_ATTR_WO(mem_limit); 2870 static DEVICE_ATTR_WO(mem_used_max); 2871 static DEVICE_ATTR_WO(idle); 2872 static DEVICE_ATTR_RW(comp_algorithm); 2873 #ifdef CONFIG_ZRAM_WRITEBACK 2874 static DEVICE_ATTR_RW(backing_dev); 2875 static DEVICE_ATTR_WO(writeback); 2876 static DEVICE_ATTR_RW(writeback_limit); 2877 static DEVICE_ATTR_RW(writeback_limit_enable); 2878 static DEVICE_ATTR_RW(writeback_batch_size); 2879 #endif 2880 #ifdef CONFIG_ZRAM_MULTI_COMP 2881 static DEVICE_ATTR_RW(recomp_algorithm); 2882 static DEVICE_ATTR_WO(recompress); 2883 #endif 2884 static DEVICE_ATTR_WO(algorithm_params); 2885 2886 static struct attribute *zram_disk_attrs[] = { 2887 &dev_attr_disksize.attr, 2888 &dev_attr_initstate.attr, 2889 &dev_attr_reset.attr, 2890 &dev_attr_compact.attr, 2891 &dev_attr_mem_limit.attr, 2892 &dev_attr_mem_used_max.attr, 2893 &dev_attr_idle.attr, 2894 &dev_attr_comp_algorithm.attr, 2895 #ifdef CONFIG_ZRAM_WRITEBACK 2896 &dev_attr_backing_dev.attr, 2897 &dev_attr_writeback.attr, 2898 &dev_attr_writeback_limit.attr, 2899 &dev_attr_writeback_limit_enable.attr, 2900 &dev_attr_writeback_batch_size.attr, 2901 #endif 2902 &dev_attr_io_stat.attr, 2903 &dev_attr_mm_stat.attr, 2904 #ifdef CONFIG_ZRAM_WRITEBACK 2905 &dev_attr_bd_stat.attr, 2906 #endif 2907 &dev_attr_debug_stat.attr, 2908 #ifdef CONFIG_ZRAM_MULTI_COMP 2909 &dev_attr_recomp_algorithm.attr, 2910 &dev_attr_recompress.attr, 2911 #endif 2912 &dev_attr_algorithm_params.attr, 2913 NULL, 2914 }; 2915 2916 ATTRIBUTE_GROUPS(zram_disk); 2917 2918 /* 2919 * Allocate and initialize new zram device. the function returns 2920 * '>= 0' device_id upon success, and negative value otherwise. 2921 */ 2922 static int zram_add(void) 2923 { 2924 struct queue_limits lim = { 2925 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 2926 /* 2927 * To ensure that we always get PAGE_SIZE aligned and 2928 * n*PAGE_SIZED sized I/O requests. 2929 */ 2930 .physical_block_size = PAGE_SIZE, 2931 .io_min = PAGE_SIZE, 2932 .io_opt = PAGE_SIZE, 2933 .max_hw_discard_sectors = UINT_MAX, 2934 /* 2935 * zram_bio_discard() will clear all logical blocks if logical 2936 * block size is identical with physical block size(PAGE_SIZE). 2937 * But if it is different, we will skip discarding some parts of 2938 * logical blocks in the part of the request range which isn't 2939 * aligned to physical block size. So we can't ensure that all 2940 * discarded logical blocks are zeroed. 2941 */ 2942 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 2943 .max_write_zeroes_sectors = UINT_MAX, 2944 #endif 2945 .features = BLK_FEAT_STABLE_WRITES | 2946 BLK_FEAT_SYNCHRONOUS, 2947 }; 2948 struct zram *zram; 2949 int ret, device_id; 2950 2951 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 2952 if (!zram) 2953 return -ENOMEM; 2954 2955 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 2956 if (ret < 0) 2957 goto out_free_dev; 2958 device_id = ret; 2959 2960 init_rwsem(&zram->init_lock); 2961 #ifdef CONFIG_ZRAM_WRITEBACK 2962 zram->wb_batch_size = 32; 2963 #endif 2964 2965 /* gendisk structure */ 2966 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 2967 if (IS_ERR(zram->disk)) { 2968 pr_err("Error allocating disk structure for device %d\n", 2969 device_id); 2970 ret = PTR_ERR(zram->disk); 2971 goto out_free_idr; 2972 } 2973 2974 zram->disk->major = zram_major; 2975 zram->disk->first_minor = device_id; 2976 zram->disk->minors = 1; 2977 zram->disk->flags |= GENHD_FL_NO_PART; 2978 zram->disk->fops = &zram_devops; 2979 zram->disk->private_data = zram; 2980 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 2981 atomic_set(&zram->pp_in_progress, 0); 2982 zram_comp_params_reset(zram); 2983 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2984 2985 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 2986 set_capacity(zram->disk, 0); 2987 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 2988 if (ret) 2989 goto out_cleanup_disk; 2990 2991 zram_debugfs_register(zram); 2992 pr_info("Added device: %s\n", zram->disk->disk_name); 2993 return device_id; 2994 2995 out_cleanup_disk: 2996 put_disk(zram->disk); 2997 out_free_idr: 2998 idr_remove(&zram_index_idr, device_id); 2999 out_free_dev: 3000 kfree(zram); 3001 return ret; 3002 } 3003 3004 static int zram_remove(struct zram *zram) 3005 { 3006 bool claimed; 3007 3008 mutex_lock(&zram->disk->open_mutex); 3009 if (disk_openers(zram->disk)) { 3010 mutex_unlock(&zram->disk->open_mutex); 3011 return -EBUSY; 3012 } 3013 3014 claimed = zram->claim; 3015 if (!claimed) 3016 zram->claim = true; 3017 mutex_unlock(&zram->disk->open_mutex); 3018 3019 zram_debugfs_unregister(zram); 3020 3021 if (claimed) { 3022 /* 3023 * If we were claimed by reset_store(), del_gendisk() will 3024 * wait until reset_store() is done, so nothing need to do. 3025 */ 3026 ; 3027 } else { 3028 /* Make sure all the pending I/O are finished */ 3029 sync_blockdev(zram->disk->part0); 3030 zram_reset_device(zram); 3031 } 3032 3033 pr_info("Removed device: %s\n", zram->disk->disk_name); 3034 3035 del_gendisk(zram->disk); 3036 3037 /* del_gendisk drains pending reset_store */ 3038 WARN_ON_ONCE(claimed && zram->claim); 3039 3040 /* 3041 * disksize_store() may be called in between zram_reset_device() 3042 * and del_gendisk(), so run the last reset to avoid leaking 3043 * anything allocated with disksize_store() 3044 */ 3045 zram_reset_device(zram); 3046 3047 put_disk(zram->disk); 3048 kfree(zram); 3049 return 0; 3050 } 3051 3052 /* zram-control sysfs attributes */ 3053 3054 /* 3055 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3056 * sense that reading from this file does alter the state of your system -- it 3057 * creates a new un-initialized zram device and returns back this device's 3058 * device_id (or an error code if it fails to create a new device). 3059 */ 3060 static ssize_t hot_add_show(const struct class *class, 3061 const struct class_attribute *attr, 3062 char *buf) 3063 { 3064 int ret; 3065 3066 mutex_lock(&zram_index_mutex); 3067 ret = zram_add(); 3068 mutex_unlock(&zram_index_mutex); 3069 3070 if (ret < 0) 3071 return ret; 3072 return sysfs_emit(buf, "%d\n", ret); 3073 } 3074 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3075 static struct class_attribute class_attr_hot_add = 3076 __ATTR(hot_add, 0400, hot_add_show, NULL); 3077 3078 static ssize_t hot_remove_store(const struct class *class, 3079 const struct class_attribute *attr, 3080 const char *buf, 3081 size_t count) 3082 { 3083 struct zram *zram; 3084 int ret, dev_id; 3085 3086 /* dev_id is gendisk->first_minor, which is `int' */ 3087 ret = kstrtoint(buf, 10, &dev_id); 3088 if (ret) 3089 return ret; 3090 if (dev_id < 0) 3091 return -EINVAL; 3092 3093 mutex_lock(&zram_index_mutex); 3094 3095 zram = idr_find(&zram_index_idr, dev_id); 3096 if (zram) { 3097 ret = zram_remove(zram); 3098 if (!ret) 3099 idr_remove(&zram_index_idr, dev_id); 3100 } else { 3101 ret = -ENODEV; 3102 } 3103 3104 mutex_unlock(&zram_index_mutex); 3105 return ret ? ret : count; 3106 } 3107 static CLASS_ATTR_WO(hot_remove); 3108 3109 static struct attribute *zram_control_class_attrs[] = { 3110 &class_attr_hot_add.attr, 3111 &class_attr_hot_remove.attr, 3112 NULL, 3113 }; 3114 ATTRIBUTE_GROUPS(zram_control_class); 3115 3116 static struct class zram_control_class = { 3117 .name = "zram-control", 3118 .class_groups = zram_control_class_groups, 3119 }; 3120 3121 static int zram_remove_cb(int id, void *ptr, void *data) 3122 { 3123 WARN_ON_ONCE(zram_remove(ptr)); 3124 return 0; 3125 } 3126 3127 static void destroy_devices(void) 3128 { 3129 class_unregister(&zram_control_class); 3130 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3131 zram_debugfs_destroy(); 3132 idr_destroy(&zram_index_idr); 3133 unregister_blkdev(zram_major, "zram"); 3134 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3135 } 3136 3137 static int __init zram_init(void) 3138 { 3139 struct zram_table_entry zram_te; 3140 int ret; 3141 3142 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); 3143 3144 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3145 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3146 if (ret < 0) 3147 return ret; 3148 3149 ret = class_register(&zram_control_class); 3150 if (ret) { 3151 pr_err("Unable to register zram-control class\n"); 3152 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3153 return ret; 3154 } 3155 3156 zram_debugfs_create(); 3157 zram_major = register_blkdev(0, "zram"); 3158 if (zram_major <= 0) { 3159 pr_err("Unable to get major number\n"); 3160 class_unregister(&zram_control_class); 3161 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3162 return -EBUSY; 3163 } 3164 3165 while (num_devices != 0) { 3166 mutex_lock(&zram_index_mutex); 3167 ret = zram_add(); 3168 mutex_unlock(&zram_index_mutex); 3169 if (ret < 0) 3170 goto out_error; 3171 num_devices--; 3172 } 3173 3174 return 0; 3175 3176 out_error: 3177 destroy_devices(); 3178 return ret; 3179 } 3180 3181 static void __exit zram_exit(void) 3182 { 3183 destroy_devices(); 3184 } 3185 3186 module_init(zram_init); 3187 module_exit(zram_exit); 3188 3189 module_param(num_devices, uint, 0); 3190 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3191 3192 MODULE_LICENSE("Dual BSD/GPL"); 3193 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3194 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3195