1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define pr_fmt(fmt) "zram: " fmt 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/bio.h> 20 #include <linux/bitops.h> 21 #include <linux/blkdev.h> 22 #include <linux/buffer_head.h> 23 #include <linux/device.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/backing-dev.h> 27 #include <linux/string.h> 28 #include <linux/vmalloc.h> 29 #include <linux/err.h> 30 #include <linux/idr.h> 31 #include <linux/sysfs.h> 32 #include <linux/debugfs.h> 33 #include <linux/cpuhotplug.h> 34 #include <linux/part_stat.h> 35 #include <linux/kernel_read_file.h> 36 37 #include "zram_drv.h" 38 39 static DEFINE_IDR(zram_index_idr); 40 /* idr index must be protected */ 41 static DEFINE_MUTEX(zram_index_mutex); 42 43 static int zram_major; 44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 45 46 #define ZRAM_MAX_ALGO_NAME_SZ 128 47 48 /* Module params (documentation at end) */ 49 static unsigned int num_devices = 1; 50 /* 51 * Pages that compress to sizes equals or greater than this are stored 52 * uncompressed in memory. 53 */ 54 static size_t huge_class_size; 55 56 static const struct block_device_operations zram_devops; 57 58 static void slot_free(struct zram *zram, u32 index); 59 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 60 61 static void slot_lock_init(struct zram *zram, u32 index) 62 { 63 static struct lock_class_key __key; 64 65 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 66 &__key, 0); 67 } 68 69 /* 70 * entry locking rules: 71 * 72 * 1) Lock is exclusive 73 * 74 * 2) lock() function can sleep waiting for the lock 75 * 76 * 3) Lock owner can sleep 77 * 78 * 4) Use TRY lock variant when in atomic context 79 * - must check return value and handle locking failers 80 */ 81 static __must_check bool slot_trylock(struct zram *zram, u32 index) 82 { 83 unsigned long *lock = &zram->table[index].__lock; 84 85 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 86 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 87 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 88 return true; 89 } 90 91 return false; 92 } 93 94 static void slot_lock(struct zram *zram, u32 index) 95 { 96 unsigned long *lock = &zram->table[index].__lock; 97 98 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 99 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 100 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 101 } 102 103 static void slot_unlock(struct zram *zram, u32 index) 104 { 105 unsigned long *lock = &zram->table[index].__lock; 106 107 mutex_release(slot_dep_map(zram, index), _RET_IP_); 108 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 109 } 110 111 static inline bool init_done(struct zram *zram) 112 { 113 return zram->disksize; 114 } 115 116 static inline struct zram *dev_to_zram(struct device *dev) 117 { 118 return (struct zram *)dev_to_disk(dev)->private_data; 119 } 120 121 static unsigned long get_slot_handle(struct zram *zram, u32 index) 122 { 123 return zram->table[index].handle; 124 } 125 126 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) 127 { 128 zram->table[index].handle = handle; 129 } 130 131 static bool test_slot_flag(struct zram *zram, u32 index, 132 enum zram_pageflags flag) 133 { 134 return zram->table[index].attr.flags & BIT(flag); 135 } 136 137 static void set_slot_flag(struct zram *zram, u32 index, 138 enum zram_pageflags flag) 139 { 140 zram->table[index].attr.flags |= BIT(flag); 141 } 142 143 static void clear_slot_flag(struct zram *zram, u32 index, 144 enum zram_pageflags flag) 145 { 146 zram->table[index].attr.flags &= ~BIT(flag); 147 } 148 149 static size_t get_slot_size(struct zram *zram, u32 index) 150 { 151 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 152 } 153 154 static void set_slot_size(struct zram *zram, u32 index, size_t size) 155 { 156 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; 157 158 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; 159 } 160 161 static inline bool slot_allocated(struct zram *zram, u32 index) 162 { 163 return get_slot_size(zram, index) || 164 test_slot_flag(zram, index, ZRAM_SAME) || 165 test_slot_flag(zram, index, ZRAM_WB); 166 } 167 168 static inline void set_slot_comp_priority(struct zram *zram, u32 index, 169 u32 prio) 170 { 171 prio &= ZRAM_COMP_PRIORITY_MASK; 172 /* 173 * Clear previous priority value first, in case if we recompress 174 * further an already recompressed page 175 */ 176 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << 177 ZRAM_COMP_PRIORITY_BIT1); 178 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 179 } 180 181 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) 182 { 183 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; 184 185 return prio & ZRAM_COMP_PRIORITY_MASK; 186 } 187 188 static void mark_slot_accessed(struct zram *zram, u32 index) 189 { 190 clear_slot_flag(zram, index, ZRAM_IDLE); 191 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 193 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); 194 #endif 195 } 196 197 static inline void update_used_max(struct zram *zram, const unsigned long pages) 198 { 199 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 200 201 do { 202 if (cur_max >= pages) 203 return; 204 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 205 &cur_max, pages)); 206 } 207 208 static bool zram_can_store_page(struct zram *zram) 209 { 210 unsigned long alloced_pages; 211 212 alloced_pages = zs_get_total_pages(zram->mem_pool); 213 update_used_max(zram, alloced_pages); 214 215 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 216 } 217 218 #if PAGE_SIZE != 4096 219 static inline bool is_partial_io(struct bio_vec *bvec) 220 { 221 return bvec->bv_len != PAGE_SIZE; 222 } 223 #define ZRAM_PARTIAL_IO 1 224 #else 225 static inline bool is_partial_io(struct bio_vec *bvec) 226 { 227 return false; 228 } 229 #endif 230 231 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 232 struct zram_pp_slot { 233 unsigned long index; 234 struct list_head entry; 235 }; 236 237 /* 238 * A post-processing bucket is, essentially, a size class, this defines 239 * the range (in bytes) of pp-slots sizes in particular bucket. 240 */ 241 #define PP_BUCKET_SIZE_RANGE 64 242 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 243 244 struct zram_pp_ctl { 245 struct list_head pp_buckets[NUM_PP_BUCKETS]; 246 }; 247 248 static struct zram_pp_ctl *init_pp_ctl(void) 249 { 250 struct zram_pp_ctl *ctl; 251 u32 idx; 252 253 ctl = kmalloc_obj(*ctl); 254 if (!ctl) 255 return NULL; 256 257 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 258 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 259 return ctl; 260 } 261 262 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 263 { 264 list_del_init(&pps->entry); 265 266 slot_lock(zram, pps->index); 267 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 268 slot_unlock(zram, pps->index); 269 270 kfree(pps); 271 } 272 273 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 274 { 275 u32 idx; 276 277 if (!ctl) 278 return; 279 280 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 281 while (!list_empty(&ctl->pp_buckets[idx])) { 282 struct zram_pp_slot *pps; 283 284 pps = list_first_entry(&ctl->pp_buckets[idx], 285 struct zram_pp_slot, 286 entry); 287 release_pp_slot(zram, pps); 288 } 289 } 290 291 kfree(ctl); 292 } 293 294 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 295 u32 index) 296 { 297 struct zram_pp_slot *pps; 298 u32 bid; 299 300 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN); 301 if (!pps) 302 return false; 303 304 INIT_LIST_HEAD(&pps->entry); 305 pps->index = index; 306 307 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 308 list_add(&pps->entry, &ctl->pp_buckets[bid]); 309 310 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 311 return true; 312 } 313 314 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 315 { 316 struct zram_pp_slot *pps = NULL; 317 s32 idx = NUM_PP_BUCKETS - 1; 318 319 /* The higher the bucket id the more optimal slot post-processing is */ 320 while (idx >= 0) { 321 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 322 struct zram_pp_slot, 323 entry); 324 if (pps) 325 break; 326 327 idx--; 328 } 329 return pps; 330 } 331 #endif 332 333 static inline void zram_fill_page(void *ptr, unsigned long len, 334 unsigned long value) 335 { 336 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 337 memset_l(ptr, value, len / sizeof(unsigned long)); 338 } 339 340 static bool page_same_filled(void *ptr, unsigned long *element) 341 { 342 unsigned long *page; 343 unsigned long val; 344 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 345 346 page = (unsigned long *)ptr; 347 val = page[0]; 348 349 if (val != page[last_pos]) 350 return false; 351 352 for (pos = 1; pos < last_pos; pos++) { 353 if (val != page[pos]) 354 return false; 355 } 356 357 *element = val; 358 359 return true; 360 } 361 362 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, 363 char *buf) 364 { 365 u32 val; 366 struct zram *zram = dev_to_zram(dev); 367 368 guard(rwsem_read)(&zram->dev_lock); 369 val = init_done(zram); 370 371 return sysfs_emit(buf, "%u\n", val); 372 } 373 374 static ssize_t disksize_show(struct device *dev, 375 struct device_attribute *attr, char *buf) 376 { 377 struct zram *zram = dev_to_zram(dev); 378 379 return sysfs_emit(buf, "%llu\n", zram->disksize); 380 } 381 382 static ssize_t mem_limit_store(struct device *dev, 383 struct device_attribute *attr, const char *buf, 384 size_t len) 385 { 386 u64 limit; 387 char *tmp; 388 struct zram *zram = dev_to_zram(dev); 389 390 limit = memparse(buf, &tmp); 391 if (buf == tmp) /* no chars parsed, invalid input */ 392 return -EINVAL; 393 394 guard(rwsem_write)(&zram->dev_lock); 395 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 396 397 return len; 398 } 399 400 static ssize_t mem_used_max_store(struct device *dev, 401 struct device_attribute *attr, 402 const char *buf, size_t len) 403 { 404 int err; 405 unsigned long val; 406 struct zram *zram = dev_to_zram(dev); 407 408 err = kstrtoul(buf, 10, &val); 409 if (err || val != 0) 410 return -EINVAL; 411 412 guard(rwsem_read)(&zram->dev_lock); 413 if (init_done(zram)) { 414 atomic_long_set(&zram->stats.max_used_pages, 415 zs_get_total_pages(zram->mem_pool)); 416 } 417 418 return len; 419 } 420 421 /* 422 * Mark all pages which are older than or equal to cutoff as IDLE. 423 * Callers should hold the zram init lock in read mode 424 */ 425 static void mark_idle(struct zram *zram, ktime_t cutoff) 426 { 427 int is_idle = 1; 428 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 429 int index; 430 431 for (index = 0; index < nr_pages; index++) { 432 /* 433 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 434 * post-processing (recompress, writeback) happens to the 435 * ZRAM_SAME slot. 436 * 437 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 438 */ 439 slot_lock(zram, index); 440 if (!slot_allocated(zram, index) || 441 test_slot_flag(zram, index, ZRAM_WB) || 442 test_slot_flag(zram, index, ZRAM_SAME)) { 443 slot_unlock(zram, index); 444 continue; 445 } 446 447 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 448 is_idle = !cutoff || 449 ktime_after(cutoff, zram->table[index].attr.ac_time); 450 #endif 451 if (is_idle) 452 set_slot_flag(zram, index, ZRAM_IDLE); 453 else 454 clear_slot_flag(zram, index, ZRAM_IDLE); 455 slot_unlock(zram, index); 456 } 457 } 458 459 static ssize_t idle_store(struct device *dev, struct device_attribute *attr, 460 const char *buf, size_t len) 461 { 462 struct zram *zram = dev_to_zram(dev); 463 ktime_t cutoff = 0; 464 465 if (!sysfs_streq(buf, "all")) { 466 /* 467 * If it did not parse as 'all' try to treat it as an integer 468 * when we have memory tracking enabled. 469 */ 470 u32 age_sec; 471 472 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && 473 !kstrtouint(buf, 0, &age_sec)) 474 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), 475 age_sec); 476 else 477 return -EINVAL; 478 } 479 480 guard(rwsem_read)(&zram->dev_lock); 481 if (!init_done(zram)) 482 return -EINVAL; 483 484 /* 485 * A cutoff of 0 marks everything as idle, this is the 486 * "all" behavior. 487 */ 488 mark_idle(zram, cutoff); 489 return len; 490 } 491 492 #ifdef CONFIG_ZRAM_WRITEBACK 493 #define INVALID_BDEV_BLOCK (~0UL) 494 495 static int read_from_zspool_raw(struct zram *zram, struct page *page, 496 u32 index); 497 static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 498 499 struct zram_wb_ctl { 500 /* idle list is accessed only by the writeback task, no concurency */ 501 struct list_head idle_reqs; 502 /* done list is accessed concurrently, protect by done_lock */ 503 struct list_head done_reqs; 504 wait_queue_head_t done_wait; 505 spinlock_t done_lock; 506 atomic_t num_inflight; 507 }; 508 509 struct zram_wb_req { 510 unsigned long blk_idx; 511 struct page *page; 512 struct zram_pp_slot *pps; 513 struct bio_vec bio_vec; 514 struct bio bio; 515 516 struct list_head entry; 517 }; 518 519 struct zram_rb_req { 520 struct work_struct work; 521 struct zram *zram; 522 struct page *page; 523 /* The read bio for backing device */ 524 struct bio *bio; 525 unsigned long blk_idx; 526 union { 527 /* The original bio to complete (async read) */ 528 struct bio *parent; 529 /* error status (sync read) */ 530 int error; 531 }; 532 u32 index; 533 }; 534 535 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 536 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, 537 char *buf) 538 { 539 struct zram *zram = dev_to_zram(dev); 540 ssize_t ret; 541 542 guard(rwsem_read)(&zram->dev_lock); 543 ret = sysfs_emit(buf, 544 "%8llu %8llu %8llu\n", 545 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 546 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 547 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 548 549 return ret; 550 } 551 552 static ssize_t compressed_writeback_store(struct device *dev, 553 struct device_attribute *attr, 554 const char *buf, size_t len) 555 { 556 struct zram *zram = dev_to_zram(dev); 557 bool val; 558 559 if (kstrtobool(buf, &val)) 560 return -EINVAL; 561 562 guard(rwsem_write)(&zram->dev_lock); 563 if (init_done(zram)) { 564 return -EBUSY; 565 } 566 567 zram->compressed_wb = val; 568 569 return len; 570 } 571 572 static ssize_t compressed_writeback_show(struct device *dev, 573 struct device_attribute *attr, 574 char *buf) 575 { 576 bool val; 577 struct zram *zram = dev_to_zram(dev); 578 579 guard(rwsem_read)(&zram->dev_lock); 580 val = zram->compressed_wb; 581 582 return sysfs_emit(buf, "%d\n", val); 583 } 584 585 static ssize_t writeback_limit_enable_store(struct device *dev, 586 struct device_attribute *attr, 587 const char *buf, size_t len) 588 { 589 struct zram *zram = dev_to_zram(dev); 590 u64 val; 591 592 if (kstrtoull(buf, 10, &val)) 593 return -EINVAL; 594 595 guard(rwsem_write)(&zram->dev_lock); 596 zram->wb_limit_enable = val; 597 598 return len; 599 } 600 601 static ssize_t writeback_limit_enable_show(struct device *dev, 602 struct device_attribute *attr, 603 char *buf) 604 { 605 bool val; 606 struct zram *zram = dev_to_zram(dev); 607 608 guard(rwsem_read)(&zram->dev_lock); 609 val = zram->wb_limit_enable; 610 611 return sysfs_emit(buf, "%d\n", val); 612 } 613 614 static ssize_t writeback_limit_store(struct device *dev, 615 struct device_attribute *attr, 616 const char *buf, size_t len) 617 { 618 struct zram *zram = dev_to_zram(dev); 619 u64 val; 620 621 if (kstrtoull(buf, 10, &val)) 622 return -EINVAL; 623 624 /* 625 * When the page size is greater than 4KB, if bd_wb_limit is set to 626 * a value that is not page - size aligned, it will cause value 627 * wrapping. For example, when the page size is set to 16KB and 628 * bd_wb_limit is set to 3, a single write - back operation will 629 * cause bd_wb_limit to become -1. Even more terrifying is that 630 * bd_wb_limit is an unsigned number. 631 */ 632 val = rounddown(val, PAGE_SIZE / 4096); 633 634 guard(rwsem_write)(&zram->dev_lock); 635 zram->bd_wb_limit = val; 636 637 return len; 638 } 639 640 static ssize_t writeback_limit_show(struct device *dev, 641 struct device_attribute *attr, char *buf) 642 { 643 u64 val; 644 struct zram *zram = dev_to_zram(dev); 645 646 guard(rwsem_read)(&zram->dev_lock); 647 val = zram->bd_wb_limit; 648 649 return sysfs_emit(buf, "%llu\n", val); 650 } 651 652 static ssize_t writeback_batch_size_store(struct device *dev, 653 struct device_attribute *attr, 654 const char *buf, size_t len) 655 { 656 struct zram *zram = dev_to_zram(dev); 657 u32 val; 658 659 if (kstrtouint(buf, 10, &val)) 660 return -EINVAL; 661 662 if (!val) 663 return -EINVAL; 664 665 guard(rwsem_write)(&zram->dev_lock); 666 zram->wb_batch_size = val; 667 668 return len; 669 } 670 671 static ssize_t writeback_batch_size_show(struct device *dev, 672 struct device_attribute *attr, 673 char *buf) 674 { 675 u32 val; 676 struct zram *zram = dev_to_zram(dev); 677 678 guard(rwsem_read)(&zram->dev_lock); 679 val = zram->wb_batch_size; 680 681 return sysfs_emit(buf, "%u\n", val); 682 } 683 684 static void reset_bdev(struct zram *zram) 685 { 686 if (!zram->backing_dev) 687 return; 688 689 /* hope filp_close flush all of IO */ 690 filp_close(zram->backing_dev, NULL); 691 zram->backing_dev = NULL; 692 zram->bdev = NULL; 693 zram->disk->fops = &zram_devops; 694 kvfree(zram->bitmap); 695 zram->bitmap = NULL; 696 } 697 698 static ssize_t backing_dev_show(struct device *dev, 699 struct device_attribute *attr, char *buf) 700 { 701 struct file *file; 702 struct zram *zram = dev_to_zram(dev); 703 char *p; 704 ssize_t ret; 705 706 guard(rwsem_read)(&zram->dev_lock); 707 file = zram->backing_dev; 708 if (!file) { 709 memcpy(buf, "none\n", 5); 710 return 5; 711 } 712 713 p = file_path(file, buf, PAGE_SIZE - 1); 714 if (IS_ERR(p)) 715 return PTR_ERR(p); 716 717 ret = strlen(p); 718 memmove(buf, p, ret); 719 buf[ret++] = '\n'; 720 return ret; 721 } 722 723 static ssize_t backing_dev_store(struct device *dev, 724 struct device_attribute *attr, const char *buf, 725 size_t len) 726 { 727 char *file_name; 728 size_t sz; 729 struct file *backing_dev = NULL; 730 struct inode *inode; 731 unsigned int bitmap_sz; 732 unsigned long nr_pages, *bitmap = NULL; 733 int err; 734 struct zram *zram = dev_to_zram(dev); 735 736 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 737 if (!file_name) 738 return -ENOMEM; 739 740 guard(rwsem_write)(&zram->dev_lock); 741 if (init_done(zram)) { 742 pr_info("Can't setup backing device for initialized device\n"); 743 err = -EBUSY; 744 goto out; 745 } 746 747 strscpy(file_name, buf, PATH_MAX); 748 /* ignore trailing newline */ 749 sz = strlen(file_name); 750 if (sz > 0 && file_name[sz - 1] == '\n') 751 file_name[sz - 1] = 0x00; 752 753 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 754 if (IS_ERR(backing_dev)) { 755 err = PTR_ERR(backing_dev); 756 backing_dev = NULL; 757 goto out; 758 } 759 760 inode = backing_dev->f_mapping->host; 761 762 /* Support only block device in this moment */ 763 if (!S_ISBLK(inode->i_mode)) { 764 err = -ENOTBLK; 765 goto out; 766 } 767 768 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 769 /* Refuse to use zero sized device (also prevents self reference) */ 770 if (!nr_pages) { 771 err = -EINVAL; 772 goto out; 773 } 774 775 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 776 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 777 if (!bitmap) { 778 err = -ENOMEM; 779 goto out; 780 } 781 782 reset_bdev(zram); 783 784 zram->bdev = I_BDEV(inode); 785 zram->backing_dev = backing_dev; 786 zram->bitmap = bitmap; 787 zram->nr_pages = nr_pages; 788 789 pr_info("setup backing device %s\n", file_name); 790 kfree(file_name); 791 792 return len; 793 out: 794 kvfree(bitmap); 795 796 if (backing_dev) 797 filp_close(backing_dev, NULL); 798 799 kfree(file_name); 800 801 return err; 802 } 803 804 static unsigned long zram_reserve_bdev_block(struct zram *zram) 805 { 806 unsigned long blk_idx; 807 808 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 809 if (blk_idx == zram->nr_pages) 810 return INVALID_BDEV_BLOCK; 811 812 set_bit(blk_idx, zram->bitmap); 813 atomic64_inc(&zram->stats.bd_count); 814 return blk_idx; 815 } 816 817 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 818 { 819 int was_set; 820 821 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 822 WARN_ON_ONCE(!was_set); 823 atomic64_dec(&zram->stats.bd_count); 824 } 825 826 static void release_wb_req(struct zram_wb_req *req) 827 { 828 __free_page(req->page); 829 kfree(req); 830 } 831 832 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 833 { 834 if (!wb_ctl) 835 return; 836 837 /* We should never have inflight requests at this point */ 838 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 839 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 840 841 while (!list_empty(&wb_ctl->idle_reqs)) { 842 struct zram_wb_req *req; 843 844 req = list_first_entry(&wb_ctl->idle_reqs, 845 struct zram_wb_req, entry); 846 list_del(&req->entry); 847 release_wb_req(req); 848 } 849 850 kfree(wb_ctl); 851 } 852 853 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 854 { 855 struct zram_wb_ctl *wb_ctl; 856 int i; 857 858 wb_ctl = kmalloc_obj(*wb_ctl); 859 if (!wb_ctl) 860 return NULL; 861 862 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 863 INIT_LIST_HEAD(&wb_ctl->done_reqs); 864 atomic_set(&wb_ctl->num_inflight, 0); 865 init_waitqueue_head(&wb_ctl->done_wait); 866 spin_lock_init(&wb_ctl->done_lock); 867 868 for (i = 0; i < zram->wb_batch_size; i++) { 869 struct zram_wb_req *req; 870 871 /* 872 * This is fatal condition only if we couldn't allocate 873 * any requests at all. Otherwise we just work with the 874 * requests that we have successfully allocated, so that 875 * writeback can still proceed, even if there is only one 876 * request on the idle list. 877 */ 878 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN); 879 if (!req) 880 break; 881 882 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 883 if (!req->page) { 884 kfree(req); 885 break; 886 } 887 888 list_add(&req->entry, &wb_ctl->idle_reqs); 889 } 890 891 /* We couldn't allocate any requests, so writeabck is not possible */ 892 if (list_empty(&wb_ctl->idle_reqs)) 893 goto release_wb_ctl; 894 895 return wb_ctl; 896 897 release_wb_ctl: 898 release_wb_ctl(wb_ctl); 899 return NULL; 900 } 901 902 static void zram_account_writeback_rollback(struct zram *zram) 903 { 904 lockdep_assert_held_write(&zram->dev_lock); 905 906 if (zram->wb_limit_enable) 907 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 908 } 909 910 static void zram_account_writeback_submit(struct zram *zram) 911 { 912 lockdep_assert_held_write(&zram->dev_lock); 913 914 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 915 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 916 } 917 918 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 919 { 920 u32 index = req->pps->index; 921 int err; 922 923 err = blk_status_to_errno(req->bio.bi_status); 924 if (err) { 925 /* 926 * Failed wb requests should not be accounted in wb_limit 927 * (if enabled). 928 */ 929 zram_account_writeback_rollback(zram); 930 zram_release_bdev_block(zram, req->blk_idx); 931 return err; 932 } 933 934 atomic64_inc(&zram->stats.bd_writes); 935 slot_lock(zram, index); 936 /* 937 * We release slot lock during writeback so slot can change under us: 938 * slot_free() or slot_free() and zram_write_page(). In both cases 939 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 940 * set ZRAM_PP_SLOT on such slots until current post-processing 941 * finishes. 942 */ 943 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { 944 zram_release_bdev_block(zram, req->blk_idx); 945 goto out; 946 } 947 948 clear_slot_flag(zram, index, ZRAM_IDLE); 949 if (test_slot_flag(zram, index, ZRAM_HUGE)) 950 atomic64_dec(&zram->stats.huge_pages); 951 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); 952 zs_free(zram->mem_pool, get_slot_handle(zram, index)); 953 set_slot_handle(zram, index, req->blk_idx); 954 set_slot_flag(zram, index, ZRAM_WB); 955 956 out: 957 slot_unlock(zram, index); 958 return 0; 959 } 960 961 static void zram_writeback_endio(struct bio *bio) 962 { 963 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 964 struct zram_wb_ctl *wb_ctl = bio->bi_private; 965 unsigned long flags; 966 967 spin_lock_irqsave(&wb_ctl->done_lock, flags); 968 list_add(&req->entry, &wb_ctl->done_reqs); 969 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 970 971 wake_up(&wb_ctl->done_wait); 972 } 973 974 static void zram_submit_wb_request(struct zram *zram, 975 struct zram_wb_ctl *wb_ctl, 976 struct zram_wb_req *req) 977 { 978 /* 979 * wb_limit (if enabled) should be adjusted before submission, 980 * so that we don't over-submit. 981 */ 982 zram_account_writeback_submit(zram); 983 atomic_inc(&wb_ctl->num_inflight); 984 req->bio.bi_private = wb_ctl; 985 submit_bio(&req->bio); 986 } 987 988 static int zram_complete_done_reqs(struct zram *zram, 989 struct zram_wb_ctl *wb_ctl) 990 { 991 struct zram_wb_req *req; 992 unsigned long flags; 993 int ret = 0, err; 994 995 while (atomic_read(&wb_ctl->num_inflight) > 0) { 996 spin_lock_irqsave(&wb_ctl->done_lock, flags); 997 req = list_first_entry_or_null(&wb_ctl->done_reqs, 998 struct zram_wb_req, entry); 999 if (req) 1000 list_del(&req->entry); 1001 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 1002 1003 /* ->num_inflight > 0 doesn't mean we have done requests */ 1004 if (!req) 1005 break; 1006 1007 err = zram_writeback_complete(zram, req); 1008 if (err) 1009 ret = err; 1010 1011 atomic_dec(&wb_ctl->num_inflight); 1012 release_pp_slot(zram, req->pps); 1013 req->pps = NULL; 1014 1015 list_add(&req->entry, &wb_ctl->idle_reqs); 1016 } 1017 1018 return ret; 1019 } 1020 1021 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 1022 { 1023 struct zram_wb_req *req; 1024 1025 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 1026 struct zram_wb_req, entry); 1027 if (req) 1028 list_del(&req->entry); 1029 return req; 1030 } 1031 1032 static int zram_writeback_slots(struct zram *zram, 1033 struct zram_pp_ctl *ctl, 1034 struct zram_wb_ctl *wb_ctl) 1035 { 1036 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1037 struct zram_wb_req *req = NULL; 1038 struct zram_pp_slot *pps; 1039 int ret = 0, err = 0; 1040 u32 index = 0; 1041 1042 while ((pps = select_pp_slot(ctl))) { 1043 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1044 ret = -EIO; 1045 break; 1046 } 1047 1048 while (!req) { 1049 req = zram_select_idle_req(wb_ctl); 1050 if (req) 1051 break; 1052 1053 wait_event(wb_ctl->done_wait, 1054 !list_empty(&wb_ctl->done_reqs)); 1055 1056 err = zram_complete_done_reqs(zram, wb_ctl); 1057 /* 1058 * BIO errors are not fatal, we continue and simply 1059 * attempt to writeback the remaining objects (pages). 1060 * At the same time we need to signal user-space that 1061 * some writes (at least one, but also could be all of 1062 * them) were not successful and we do so by returning 1063 * the most recent BIO error. 1064 */ 1065 if (err) 1066 ret = err; 1067 } 1068 1069 if (blk_idx == INVALID_BDEV_BLOCK) { 1070 blk_idx = zram_reserve_bdev_block(zram); 1071 if (blk_idx == INVALID_BDEV_BLOCK) { 1072 ret = -ENOSPC; 1073 break; 1074 } 1075 } 1076 1077 index = pps->index; 1078 slot_lock(zram, index); 1079 /* 1080 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1081 * slots can change in the meantime. If slots are accessed or 1082 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1083 * post-process them. 1084 */ 1085 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) 1086 goto next; 1087 if (zram->compressed_wb) 1088 err = read_from_zspool_raw(zram, req->page, index); 1089 else 1090 err = read_from_zspool(zram, req->page, index); 1091 if (err) 1092 goto next; 1093 slot_unlock(zram, index); 1094 1095 /* 1096 * From now on pp-slot is owned by the req, remove it from 1097 * its pp bucket. 1098 */ 1099 list_del_init(&pps->entry); 1100 1101 req->blk_idx = blk_idx; 1102 req->pps = pps; 1103 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1104 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1105 req->bio.bi_end_io = zram_writeback_endio; 1106 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1107 1108 zram_submit_wb_request(zram, wb_ctl, req); 1109 blk_idx = INVALID_BDEV_BLOCK; 1110 req = NULL; 1111 cond_resched(); 1112 continue; 1113 1114 next: 1115 slot_unlock(zram, index); 1116 release_pp_slot(zram, pps); 1117 } 1118 1119 /* 1120 * Selected idle req, but never submitted it due to some error or 1121 * wb limit. 1122 */ 1123 if (req) 1124 release_wb_req(req); 1125 1126 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1127 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1128 err = zram_complete_done_reqs(zram, wb_ctl); 1129 if (err) 1130 ret = err; 1131 } 1132 1133 return ret; 1134 } 1135 1136 #define PAGE_WRITEBACK 0 1137 #define HUGE_WRITEBACK (1 << 0) 1138 #define IDLE_WRITEBACK (1 << 1) 1139 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1140 1141 static int parse_page_index(char *val, unsigned long nr_pages, 1142 unsigned long *lo, unsigned long *hi) 1143 { 1144 int ret; 1145 1146 ret = kstrtoul(val, 10, lo); 1147 if (ret) 1148 return ret; 1149 if (*lo >= nr_pages) 1150 return -ERANGE; 1151 *hi = *lo + 1; 1152 return 0; 1153 } 1154 1155 static int parse_page_indexes(char *val, unsigned long nr_pages, 1156 unsigned long *lo, unsigned long *hi) 1157 { 1158 char *delim; 1159 int ret; 1160 1161 delim = strchr(val, '-'); 1162 if (!delim) 1163 return -EINVAL; 1164 1165 *delim = 0x00; 1166 ret = kstrtoul(val, 10, lo); 1167 if (ret) 1168 return ret; 1169 if (*lo >= nr_pages) 1170 return -ERANGE; 1171 1172 ret = kstrtoul(delim + 1, 10, hi); 1173 if (ret) 1174 return ret; 1175 if (*hi >= nr_pages || *lo > *hi) 1176 return -ERANGE; 1177 *hi += 1; 1178 return 0; 1179 } 1180 1181 static int parse_mode(char *val, u32 *mode) 1182 { 1183 *mode = 0; 1184 1185 if (!strcmp(val, "idle")) 1186 *mode = IDLE_WRITEBACK; 1187 if (!strcmp(val, "huge")) 1188 *mode = HUGE_WRITEBACK; 1189 if (!strcmp(val, "huge_idle")) 1190 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1191 if (!strcmp(val, "incompressible")) 1192 *mode = INCOMPRESSIBLE_WRITEBACK; 1193 1194 if (*mode == 0) 1195 return -EINVAL; 1196 return 0; 1197 } 1198 1199 static void scan_slots_for_writeback(struct zram *zram, u32 mode, 1200 unsigned long lo, unsigned long hi, 1201 struct zram_pp_ctl *ctl) 1202 { 1203 u32 index = lo; 1204 1205 while (index < hi) { 1206 bool ok = true; 1207 1208 slot_lock(zram, index); 1209 if (!slot_allocated(zram, index)) 1210 goto next; 1211 1212 if (test_slot_flag(zram, index, ZRAM_WB) || 1213 test_slot_flag(zram, index, ZRAM_SAME)) 1214 goto next; 1215 1216 if (mode & IDLE_WRITEBACK && 1217 !test_slot_flag(zram, index, ZRAM_IDLE)) 1218 goto next; 1219 if (mode & HUGE_WRITEBACK && 1220 !test_slot_flag(zram, index, ZRAM_HUGE)) 1221 goto next; 1222 if (mode & INCOMPRESSIBLE_WRITEBACK && 1223 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1224 goto next; 1225 1226 ok = place_pp_slot(zram, ctl, index); 1227 next: 1228 slot_unlock(zram, index); 1229 if (!ok) 1230 break; 1231 index++; 1232 } 1233 } 1234 1235 static ssize_t writeback_store(struct device *dev, 1236 struct device_attribute *attr, 1237 const char *buf, size_t len) 1238 { 1239 struct zram *zram = dev_to_zram(dev); 1240 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1241 unsigned long lo = 0, hi = nr_pages; 1242 struct zram_pp_ctl *pp_ctl = NULL; 1243 struct zram_wb_ctl *wb_ctl = NULL; 1244 char *args, *param, *val; 1245 ssize_t ret = len; 1246 int err, mode = 0; 1247 1248 guard(rwsem_write)(&zram->dev_lock); 1249 if (!init_done(zram)) 1250 return -EINVAL; 1251 1252 if (!zram->backing_dev) 1253 return -ENODEV; 1254 1255 pp_ctl = init_pp_ctl(); 1256 if (!pp_ctl) 1257 return -ENOMEM; 1258 1259 wb_ctl = init_wb_ctl(zram); 1260 if (!wb_ctl) { 1261 ret = -ENOMEM; 1262 goto out; 1263 } 1264 1265 args = skip_spaces(buf); 1266 while (*args) { 1267 args = next_arg(args, ¶m, &val); 1268 1269 /* 1270 * Workaround to support the old writeback interface. 1271 * 1272 * The old writeback interface has a minor inconsistency and 1273 * requires key=value only for page_index parameter, while the 1274 * writeback mode is a valueless parameter. 1275 * 1276 * This is not the case anymore and now all parameters are 1277 * required to have values, however, we need to support the 1278 * legacy writeback interface format so we check if we can 1279 * recognize a valueless parameter as the (legacy) writeback 1280 * mode. 1281 */ 1282 if (!val || !*val) { 1283 err = parse_mode(param, &mode); 1284 if (err) { 1285 ret = err; 1286 goto out; 1287 } 1288 1289 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1290 break; 1291 } 1292 1293 if (!strcmp(param, "type")) { 1294 err = parse_mode(val, &mode); 1295 if (err) { 1296 ret = err; 1297 goto out; 1298 } 1299 1300 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1301 break; 1302 } 1303 1304 if (!strcmp(param, "page_index")) { 1305 err = parse_page_index(val, nr_pages, &lo, &hi); 1306 if (err) { 1307 ret = err; 1308 goto out; 1309 } 1310 1311 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1312 continue; 1313 } 1314 1315 if (!strcmp(param, "page_indexes")) { 1316 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1317 if (err) { 1318 ret = err; 1319 goto out; 1320 } 1321 1322 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1323 continue; 1324 } 1325 } 1326 1327 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1328 if (err) 1329 ret = err; 1330 1331 out: 1332 release_pp_ctl(zram, pp_ctl); 1333 release_wb_ctl(wb_ctl); 1334 1335 return ret; 1336 } 1337 1338 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1339 { 1340 struct zcomp_strm *zstrm; 1341 unsigned int size; 1342 int ret, prio; 1343 void *src; 1344 1345 slot_lock(zram, index); 1346 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1347 if (!test_slot_flag(zram, index, ZRAM_WB)) { 1348 slot_unlock(zram, index); 1349 /* We read some stale data, zero it out */ 1350 memset_page(page, 0, 0, PAGE_SIZE); 1351 return -EIO; 1352 } 1353 1354 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1355 slot_unlock(zram, index); 1356 return 0; 1357 } 1358 1359 size = get_slot_size(zram, index); 1360 prio = get_slot_comp_priority(zram, index); 1361 1362 zstrm = zcomp_stream_get(zram->comps[prio]); 1363 src = kmap_local_page(page); 1364 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1365 zstrm->local_copy); 1366 if (!ret) 1367 copy_page(src, zstrm->local_copy); 1368 kunmap_local(src); 1369 zcomp_stream_put(zstrm); 1370 slot_unlock(zram, index); 1371 1372 return ret; 1373 } 1374 1375 static void zram_deferred_decompress(struct work_struct *w) 1376 { 1377 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1378 struct page *page = bio_first_page_all(req->bio); 1379 struct zram *zram = req->zram; 1380 u32 index = req->index; 1381 int ret; 1382 1383 ret = decompress_bdev_page(zram, page, index); 1384 if (ret) 1385 req->parent->bi_status = BLK_STS_IOERR; 1386 1387 /* Decrement parent's ->remaining */ 1388 bio_endio(req->parent); 1389 bio_put(req->bio); 1390 kfree(req); 1391 } 1392 1393 static void zram_async_read_endio(struct bio *bio) 1394 { 1395 struct zram_rb_req *req = bio->bi_private; 1396 struct zram *zram = req->zram; 1397 1398 if (bio->bi_status) { 1399 req->parent->bi_status = bio->bi_status; 1400 bio_endio(req->parent); 1401 bio_put(bio); 1402 kfree(req); 1403 return; 1404 } 1405 1406 /* 1407 * NOTE: zram_async_read_endio() is not exactly right place for this. 1408 * Ideally, we need to do it after ZRAM_WB check, but this requires 1409 * us to use wq path even on systems that don't enable compressed 1410 * writeback, because we cannot take slot-lock in the current context. 1411 * 1412 * Keep the existing behavior for now. 1413 */ 1414 if (zram->compressed_wb == false) { 1415 /* No decompression needed, complete the parent IO */ 1416 bio_endio(req->parent); 1417 bio_put(bio); 1418 kfree(req); 1419 return; 1420 } 1421 1422 /* 1423 * zram decompression is sleepable, so we need to deffer it to 1424 * a preemptible context. 1425 */ 1426 INIT_WORK(&req->work, zram_deferred_decompress); 1427 queue_work(system_highpri_wq, &req->work); 1428 } 1429 1430 static int read_from_bdev_async(struct zram *zram, struct page *page, 1431 u32 index, unsigned long blk_idx, 1432 struct bio *parent) 1433 { 1434 struct zram_rb_req *req; 1435 struct bio *bio; 1436 1437 req = kmalloc_obj(*req, GFP_NOIO); 1438 if (!req) 1439 return -ENOMEM; 1440 1441 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1442 if (!bio) { 1443 kfree(req); 1444 return -ENOMEM; 1445 } 1446 1447 req->zram = zram; 1448 req->index = index; 1449 req->blk_idx = blk_idx; 1450 req->bio = bio; 1451 req->parent = parent; 1452 1453 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1454 bio->bi_private = req; 1455 bio->bi_end_io = zram_async_read_endio; 1456 1457 __bio_add_page(bio, page, PAGE_SIZE, 0); 1458 bio_inc_remaining(parent); 1459 submit_bio(bio); 1460 1461 return 0; 1462 } 1463 1464 static void zram_sync_read(struct work_struct *w) 1465 { 1466 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1467 struct bio_vec bv; 1468 struct bio bio; 1469 1470 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1471 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1472 __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1473 req->error = submit_bio_wait(&bio); 1474 } 1475 1476 /* 1477 * Block layer want one ->submit_bio to be active at a time, so if we use 1478 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1479 * use a worker thread context. 1480 */ 1481 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1482 unsigned long blk_idx) 1483 { 1484 struct zram_rb_req req; 1485 1486 req.page = page; 1487 req.zram = zram; 1488 req.blk_idx = blk_idx; 1489 1490 INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1491 queue_work(system_dfl_wq, &req.work); 1492 flush_work(&req.work); 1493 destroy_work_on_stack(&req.work); 1494 1495 if (req.error || zram->compressed_wb == false) 1496 return req.error; 1497 1498 return decompress_bdev_page(zram, page, index); 1499 } 1500 1501 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1502 unsigned long blk_idx, struct bio *parent) 1503 { 1504 atomic64_inc(&zram->stats.bd_reads); 1505 if (!parent) { 1506 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1507 return -EIO; 1508 return read_from_bdev_sync(zram, page, index, blk_idx); 1509 } 1510 return read_from_bdev_async(zram, page, index, blk_idx, parent); 1511 } 1512 #else 1513 static inline void reset_bdev(struct zram *zram) {}; 1514 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1515 unsigned long blk_idx, struct bio *parent) 1516 { 1517 return -EIO; 1518 } 1519 1520 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1521 { 1522 } 1523 #endif 1524 1525 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1526 1527 static struct dentry *zram_debugfs_root; 1528 1529 static void zram_debugfs_create(void) 1530 { 1531 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1532 } 1533 1534 static void zram_debugfs_destroy(void) 1535 { 1536 debugfs_remove_recursive(zram_debugfs_root); 1537 } 1538 1539 static ssize_t read_block_state(struct file *file, char __user *buf, 1540 size_t count, loff_t *ppos) 1541 { 1542 char *kbuf; 1543 ssize_t index, written = 0; 1544 struct zram *zram = file->private_data; 1545 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1546 1547 kbuf = kvmalloc(count, GFP_KERNEL); 1548 if (!kbuf) 1549 return -ENOMEM; 1550 1551 guard(rwsem_read)(&zram->dev_lock); 1552 if (!init_done(zram)) { 1553 kvfree(kbuf); 1554 return -EINVAL; 1555 } 1556 1557 for (index = *ppos; index < nr_pages; index++) { 1558 int copied; 1559 1560 slot_lock(zram, index); 1561 if (!slot_allocated(zram, index)) 1562 goto next; 1563 1564 copied = snprintf(kbuf + written, count, 1565 "%12zd %12u.%06d %c%c%c%c%c%c\n", 1566 index, zram->table[index].attr.ac_time, 0, 1567 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1568 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1569 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1570 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1571 get_slot_comp_priority(zram, index) ? 'r' : '.', 1572 test_slot_flag(zram, index, 1573 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1574 1575 if (count <= copied) { 1576 slot_unlock(zram, index); 1577 break; 1578 } 1579 written += copied; 1580 count -= copied; 1581 next: 1582 slot_unlock(zram, index); 1583 *ppos += 1; 1584 } 1585 1586 if (copy_to_user(buf, kbuf, written)) 1587 written = -EFAULT; 1588 kvfree(kbuf); 1589 1590 return written; 1591 } 1592 1593 static const struct file_operations proc_zram_block_state_op = { 1594 .open = simple_open, 1595 .read = read_block_state, 1596 .llseek = default_llseek, 1597 }; 1598 1599 static void zram_debugfs_register(struct zram *zram) 1600 { 1601 if (!zram_debugfs_root) 1602 return; 1603 1604 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1605 zram_debugfs_root); 1606 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1607 zram, &proc_zram_block_state_op); 1608 } 1609 1610 static void zram_debugfs_unregister(struct zram *zram) 1611 { 1612 debugfs_remove_recursive(zram->debugfs_dir); 1613 } 1614 #else 1615 static void zram_debugfs_create(void) {}; 1616 static void zram_debugfs_destroy(void) {}; 1617 static void zram_debugfs_register(struct zram *zram) {}; 1618 static void zram_debugfs_unregister(struct zram *zram) {}; 1619 #endif 1620 1621 /* Only algo parameter given, lookup by algo name */ 1622 static int lookup_algo_priority(struct zram *zram, const char *algo, 1623 u32 min_prio) 1624 { 1625 s32 prio; 1626 1627 for (prio = min_prio; prio < ZRAM_MAX_COMPS; prio++) { 1628 if (!zram->comp_algs[prio]) 1629 continue; 1630 1631 if (!strcmp(zram->comp_algs[prio], algo)) 1632 return prio; 1633 } 1634 1635 return -EINVAL; 1636 } 1637 1638 /* Both algo and priority parameters given, validate them */ 1639 static int validate_algo_priority(struct zram *zram, const char *algo, u32 prio) 1640 { 1641 if (prio >= ZRAM_MAX_COMPS) 1642 return -EINVAL; 1643 /* No algo at given priority */ 1644 if (!zram->comp_algs[prio]) 1645 return -EINVAL; 1646 /* A different algo at given priority */ 1647 if (strcmp(zram->comp_algs[prio], algo)) 1648 return -EINVAL; 1649 return 0; 1650 } 1651 1652 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1653 { 1654 zram->comp_algs[prio] = alg; 1655 } 1656 1657 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1658 { 1659 const char *alg; 1660 size_t sz; 1661 1662 sz = strlen(buf); 1663 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1664 return -E2BIG; 1665 1666 alg = zcomp_lookup_backend_name(buf); 1667 if (!alg) 1668 return -EINVAL; 1669 1670 guard(rwsem_write)(&zram->dev_lock); 1671 if (init_done(zram)) { 1672 pr_info("Can't change algorithm for initialized device\n"); 1673 return -EBUSY; 1674 } 1675 1676 comp_algorithm_set(zram, prio, alg); 1677 return 0; 1678 } 1679 1680 static void comp_params_reset(struct zram *zram, u32 prio) 1681 { 1682 struct zcomp_params *params = &zram->params[prio]; 1683 1684 vfree(params->dict); 1685 params->level = ZCOMP_PARAM_NOT_SET; 1686 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1687 params->dict_sz = 0; 1688 params->dict = NULL; 1689 } 1690 1691 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1692 const char *dict_path, 1693 struct deflate_params *deflate_params) 1694 { 1695 ssize_t sz = 0; 1696 1697 comp_params_reset(zram, prio); 1698 1699 if (dict_path) { 1700 sz = kernel_read_file_from_path(dict_path, 0, 1701 &zram->params[prio].dict, 1702 INT_MAX, 1703 NULL, 1704 READING_POLICY); 1705 if (sz < 0) 1706 return -EINVAL; 1707 } 1708 1709 zram->params[prio].dict_sz = sz; 1710 zram->params[prio].level = level; 1711 zram->params[prio].deflate.winbits = deflate_params->winbits; 1712 return 0; 1713 } 1714 1715 static ssize_t algorithm_params_store(struct device *dev, 1716 struct device_attribute *attr, 1717 const char *buf, 1718 size_t len) 1719 { 1720 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1721 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1722 struct deflate_params deflate_params; 1723 struct zram *zram = dev_to_zram(dev); 1724 bool prio_param = false; 1725 int ret; 1726 1727 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1728 1729 args = skip_spaces(buf); 1730 while (*args) { 1731 args = next_arg(args, ¶m, &val); 1732 1733 if (!val || !*val) 1734 return -EINVAL; 1735 1736 if (!strcmp(param, "priority")) { 1737 prio_param = true; 1738 ret = kstrtoint(val, 10, &prio); 1739 if (ret) 1740 return ret; 1741 continue; 1742 } 1743 1744 if (!strcmp(param, "level")) { 1745 ret = kstrtoint(val, 10, &level); 1746 if (ret) 1747 return ret; 1748 continue; 1749 } 1750 1751 if (!strcmp(param, "algo")) { 1752 algo = val; 1753 continue; 1754 } 1755 1756 if (!strcmp(param, "dict")) { 1757 dict_path = val; 1758 continue; 1759 } 1760 1761 if (!strcmp(param, "deflate.winbits")) { 1762 ret = kstrtoint(val, 10, &deflate_params.winbits); 1763 if (ret) 1764 return ret; 1765 continue; 1766 } 1767 } 1768 1769 guard(rwsem_write)(&zram->dev_lock); 1770 if (init_done(zram)) 1771 return -EBUSY; 1772 1773 if (prio_param) { 1774 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1775 return -EINVAL; 1776 } 1777 1778 if (algo && prio_param) { 1779 ret = validate_algo_priority(zram, algo, prio); 1780 if (ret) 1781 return ret; 1782 } 1783 1784 if (algo && !prio_param) { 1785 prio = lookup_algo_priority(zram, algo, ZRAM_PRIMARY_COMP); 1786 if (prio < 0) 1787 return -EINVAL; 1788 } 1789 1790 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1791 return ret ? ret : len; 1792 } 1793 1794 static ssize_t comp_algorithm_show(struct device *dev, 1795 struct device_attribute *attr, 1796 char *buf) 1797 { 1798 struct zram *zram = dev_to_zram(dev); 1799 ssize_t sz; 1800 1801 guard(rwsem_read)(&zram->dev_lock); 1802 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1803 return sz; 1804 } 1805 1806 static ssize_t comp_algorithm_store(struct device *dev, 1807 struct device_attribute *attr, 1808 const char *buf, 1809 size_t len) 1810 { 1811 struct zram *zram = dev_to_zram(dev); 1812 int ret; 1813 1814 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1815 return ret ? ret : len; 1816 } 1817 1818 #ifdef CONFIG_ZRAM_MULTI_COMP 1819 static ssize_t recomp_algorithm_show(struct device *dev, 1820 struct device_attribute *attr, 1821 char *buf) 1822 { 1823 struct zram *zram = dev_to_zram(dev); 1824 ssize_t sz = 0; 1825 u32 prio; 1826 1827 guard(rwsem_read)(&zram->dev_lock); 1828 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1829 if (!zram->comp_algs[prio]) 1830 continue; 1831 1832 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1833 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1834 } 1835 return sz; 1836 } 1837 1838 static ssize_t recomp_algorithm_store(struct device *dev, 1839 struct device_attribute *attr, 1840 const char *buf, 1841 size_t len) 1842 { 1843 struct zram *zram = dev_to_zram(dev); 1844 int prio = ZRAM_SECONDARY_COMP; 1845 char *args, *param, *val; 1846 char *alg = NULL; 1847 int ret; 1848 1849 args = skip_spaces(buf); 1850 while (*args) { 1851 args = next_arg(args, ¶m, &val); 1852 1853 if (!val || !*val) 1854 return -EINVAL; 1855 1856 if (!strcmp(param, "algo")) { 1857 alg = val; 1858 continue; 1859 } 1860 1861 if (!strcmp(param, "priority")) { 1862 ret = kstrtoint(val, 10, &prio); 1863 if (ret) 1864 return ret; 1865 continue; 1866 } 1867 } 1868 1869 if (!alg) 1870 return -EINVAL; 1871 1872 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1873 return -EINVAL; 1874 1875 ret = __comp_algorithm_store(zram, prio, alg); 1876 return ret ? ret : len; 1877 } 1878 #endif 1879 1880 static ssize_t compact_store(struct device *dev, struct device_attribute *attr, 1881 const char *buf, size_t len) 1882 { 1883 struct zram *zram = dev_to_zram(dev); 1884 1885 guard(rwsem_read)(&zram->dev_lock); 1886 if (!init_done(zram)) 1887 return -EINVAL; 1888 1889 zs_compact(zram->mem_pool); 1890 1891 return len; 1892 } 1893 1894 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, 1895 char *buf) 1896 { 1897 struct zram *zram = dev_to_zram(dev); 1898 ssize_t ret; 1899 1900 guard(rwsem_read)(&zram->dev_lock); 1901 ret = sysfs_emit(buf, 1902 "%8llu %8llu 0 %8llu\n", 1903 (u64)atomic64_read(&zram->stats.failed_reads), 1904 (u64)atomic64_read(&zram->stats.failed_writes), 1905 (u64)atomic64_read(&zram->stats.notify_free)); 1906 1907 return ret; 1908 } 1909 1910 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, 1911 char *buf) 1912 { 1913 struct zram *zram = dev_to_zram(dev); 1914 struct zs_pool_stats pool_stats; 1915 u64 orig_size, mem_used = 0; 1916 long max_used; 1917 ssize_t ret; 1918 1919 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1920 1921 guard(rwsem_read)(&zram->dev_lock); 1922 if (init_done(zram)) { 1923 mem_used = zs_get_total_pages(zram->mem_pool); 1924 zs_pool_stats(zram->mem_pool, &pool_stats); 1925 } 1926 1927 orig_size = atomic64_read(&zram->stats.pages_stored); 1928 max_used = atomic_long_read(&zram->stats.max_used_pages); 1929 1930 ret = sysfs_emit(buf, 1931 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1932 orig_size << PAGE_SHIFT, 1933 (u64)atomic64_read(&zram->stats.compr_data_size), 1934 mem_used << PAGE_SHIFT, 1935 zram->limit_pages << PAGE_SHIFT, 1936 max_used << PAGE_SHIFT, 1937 (u64)atomic64_read(&zram->stats.same_pages), 1938 atomic_long_read(&pool_stats.pages_compacted), 1939 (u64)atomic64_read(&zram->stats.huge_pages), 1940 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1941 1942 return ret; 1943 } 1944 1945 static ssize_t debug_stat_show(struct device *dev, 1946 struct device_attribute *attr, char *buf) 1947 { 1948 int version = 1; 1949 struct zram *zram = dev_to_zram(dev); 1950 ssize_t ret; 1951 1952 guard(rwsem_read)(&zram->dev_lock); 1953 ret = sysfs_emit(buf, 1954 "version: %d\n0 %8llu\n", 1955 version, 1956 (u64)atomic64_read(&zram->stats.miss_free)); 1957 1958 return ret; 1959 } 1960 1961 static void zram_meta_free(struct zram *zram, u64 disksize) 1962 { 1963 size_t num_pages = disksize >> PAGE_SHIFT; 1964 size_t index; 1965 1966 if (!zram->table) 1967 return; 1968 1969 /* Free all pages that are still in this zram device */ 1970 for (index = 0; index < num_pages; index++) 1971 slot_free(zram, index); 1972 1973 zs_destroy_pool(zram->mem_pool); 1974 vfree(zram->table); 1975 zram->table = NULL; 1976 } 1977 1978 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1979 { 1980 size_t num_pages, index; 1981 1982 num_pages = disksize >> PAGE_SHIFT; 1983 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1984 if (!zram->table) 1985 return false; 1986 1987 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1988 if (!zram->mem_pool) { 1989 vfree(zram->table); 1990 zram->table = NULL; 1991 return false; 1992 } 1993 1994 if (!huge_class_size) 1995 huge_class_size = zs_huge_class_size(zram->mem_pool); 1996 1997 for (index = 0; index < num_pages; index++) 1998 slot_lock_init(zram, index); 1999 2000 return true; 2001 } 2002 2003 static void slot_free(struct zram *zram, u32 index) 2004 { 2005 unsigned long handle; 2006 2007 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 2008 zram->table[index].attr.ac_time = 0; 2009 #endif 2010 2011 clear_slot_flag(zram, index, ZRAM_IDLE); 2012 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2013 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 2014 set_slot_comp_priority(zram, index, 0); 2015 2016 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 2017 /* 2018 * Writeback completion decrements ->huge_pages but keeps 2019 * ZRAM_HUGE flag for deferred decompression path. 2020 */ 2021 if (!test_slot_flag(zram, index, ZRAM_WB)) 2022 atomic64_dec(&zram->stats.huge_pages); 2023 clear_slot_flag(zram, index, ZRAM_HUGE); 2024 } 2025 2026 if (test_slot_flag(zram, index, ZRAM_WB)) { 2027 clear_slot_flag(zram, index, ZRAM_WB); 2028 zram_release_bdev_block(zram, get_slot_handle(zram, index)); 2029 goto out; 2030 } 2031 2032 /* 2033 * No memory is allocated for same element filled pages. 2034 * Simply clear same page flag. 2035 */ 2036 if (test_slot_flag(zram, index, ZRAM_SAME)) { 2037 clear_slot_flag(zram, index, ZRAM_SAME); 2038 atomic64_dec(&zram->stats.same_pages); 2039 goto out; 2040 } 2041 2042 handle = get_slot_handle(zram, index); 2043 if (!handle) 2044 return; 2045 2046 zs_free(zram->mem_pool, handle); 2047 2048 atomic64_sub(get_slot_size(zram, index), 2049 &zram->stats.compr_data_size); 2050 out: 2051 atomic64_dec(&zram->stats.pages_stored); 2052 set_slot_handle(zram, index, 0); 2053 set_slot_size(zram, index, 0); 2054 } 2055 2056 static int read_same_filled_page(struct zram *zram, struct page *page, 2057 u32 index) 2058 { 2059 void *mem; 2060 2061 mem = kmap_local_page(page); 2062 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); 2063 kunmap_local(mem); 2064 return 0; 2065 } 2066 2067 static int read_incompressible_page(struct zram *zram, struct page *page, 2068 u32 index) 2069 { 2070 unsigned long handle; 2071 void *src, *dst; 2072 2073 handle = get_slot_handle(zram, index); 2074 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); 2075 dst = kmap_local_page(page); 2076 copy_page(dst, src); 2077 kunmap_local(dst); 2078 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); 2079 2080 return 0; 2081 } 2082 2083 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 2084 { 2085 struct zcomp_strm *zstrm; 2086 unsigned long handle; 2087 unsigned int size; 2088 void *src, *dst; 2089 int ret, prio; 2090 2091 handle = get_slot_handle(zram, index); 2092 size = get_slot_size(zram, index); 2093 prio = get_slot_comp_priority(zram, index); 2094 2095 zstrm = zcomp_stream_get(zram->comps[prio]); 2096 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2097 zstrm->local_copy); 2098 dst = kmap_local_page(page); 2099 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 2100 kunmap_local(dst); 2101 zs_obj_read_end(zram->mem_pool, handle, size, src); 2102 zcomp_stream_put(zstrm); 2103 2104 return ret; 2105 } 2106 2107 #if defined CONFIG_ZRAM_WRITEBACK 2108 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 2109 { 2110 struct zcomp_strm *zstrm; 2111 unsigned long handle; 2112 unsigned int size; 2113 void *src; 2114 2115 handle = get_slot_handle(zram, index); 2116 size = get_slot_size(zram, index); 2117 2118 /* 2119 * We need to get stream just for ->local_copy buffer, in 2120 * case if object spans two physical pages. No decompression 2121 * takes place here, as we read raw compressed data. 2122 */ 2123 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2124 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2125 zstrm->local_copy); 2126 memcpy_to_page(page, 0, src, size); 2127 zs_obj_read_end(zram->mem_pool, handle, size, src); 2128 zcomp_stream_put(zstrm); 2129 2130 return 0; 2131 } 2132 #endif 2133 2134 /* 2135 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2136 * Corresponding ZRAM slot should be locked. 2137 */ 2138 static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2139 { 2140 if (test_slot_flag(zram, index, ZRAM_SAME) || 2141 !get_slot_handle(zram, index)) 2142 return read_same_filled_page(zram, page, index); 2143 2144 if (!test_slot_flag(zram, index, ZRAM_HUGE)) 2145 return read_compressed_page(zram, page, index); 2146 else 2147 return read_incompressible_page(zram, page, index); 2148 } 2149 2150 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 2151 struct bio *parent) 2152 { 2153 int ret; 2154 2155 slot_lock(zram, index); 2156 if (!test_slot_flag(zram, index, ZRAM_WB)) { 2157 /* Slot should be locked through out the function call */ 2158 ret = read_from_zspool(zram, page, index); 2159 slot_unlock(zram, index); 2160 } else { 2161 unsigned long blk_idx = get_slot_handle(zram, index); 2162 2163 /* 2164 * The slot should be unlocked before reading from the backing 2165 * device. 2166 */ 2167 slot_unlock(zram, index); 2168 ret = read_from_bdev(zram, page, index, blk_idx, parent); 2169 } 2170 2171 /* Should NEVER happen. Return bio error if it does. */ 2172 if (WARN_ON(ret < 0)) 2173 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2174 2175 return ret; 2176 } 2177 2178 /* 2179 * Use a temporary buffer to decompress the page, as the decompressor 2180 * always expects a full page for the output. 2181 */ 2182 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2183 u32 index, int offset) 2184 { 2185 struct page *page = alloc_page(GFP_NOIO); 2186 int ret; 2187 2188 if (!page) 2189 return -ENOMEM; 2190 ret = zram_read_page(zram, page, index, NULL); 2191 if (likely(!ret)) 2192 memcpy_to_bvec(bvec, page_address(page) + offset); 2193 __free_page(page); 2194 return ret; 2195 } 2196 2197 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2198 u32 index, int offset, struct bio *bio) 2199 { 2200 if (is_partial_io(bvec)) 2201 return zram_bvec_read_partial(zram, bvec, index, offset); 2202 return zram_read_page(zram, bvec->bv_page, index, bio); 2203 } 2204 2205 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2206 u32 index) 2207 { 2208 slot_lock(zram, index); 2209 slot_free(zram, index); 2210 set_slot_flag(zram, index, ZRAM_SAME); 2211 set_slot_handle(zram, index, fill); 2212 slot_unlock(zram, index); 2213 2214 atomic64_inc(&zram->stats.same_pages); 2215 atomic64_inc(&zram->stats.pages_stored); 2216 2217 return 0; 2218 } 2219 2220 static int write_incompressible_page(struct zram *zram, struct page *page, 2221 u32 index) 2222 { 2223 unsigned long handle; 2224 void *src; 2225 2226 /* 2227 * This function is called from preemptible context so we don't need 2228 * to do optimistic and fallback to pessimistic handle allocation, 2229 * like we do for compressible pages. 2230 */ 2231 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2232 GFP_NOIO | __GFP_NOWARN | 2233 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2234 if (IS_ERR_VALUE(handle)) 2235 return PTR_ERR((void *)handle); 2236 2237 if (!zram_can_store_page(zram)) { 2238 zs_free(zram->mem_pool, handle); 2239 return -ENOMEM; 2240 } 2241 2242 src = kmap_local_page(page); 2243 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2244 kunmap_local(src); 2245 2246 slot_lock(zram, index); 2247 slot_free(zram, index); 2248 set_slot_flag(zram, index, ZRAM_HUGE); 2249 set_slot_handle(zram, index, handle); 2250 set_slot_size(zram, index, PAGE_SIZE); 2251 slot_unlock(zram, index); 2252 2253 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2254 atomic64_inc(&zram->stats.huge_pages); 2255 atomic64_inc(&zram->stats.huge_pages_since); 2256 atomic64_inc(&zram->stats.pages_stored); 2257 2258 return 0; 2259 } 2260 2261 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2262 { 2263 int ret = 0; 2264 unsigned long handle; 2265 unsigned int comp_len; 2266 void *mem; 2267 struct zcomp_strm *zstrm; 2268 unsigned long element; 2269 bool same_filled; 2270 2271 mem = kmap_local_page(page); 2272 same_filled = page_same_filled(mem, &element); 2273 kunmap_local(mem); 2274 if (same_filled) 2275 return write_same_filled_page(zram, element, index); 2276 2277 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2278 mem = kmap_local_page(page); 2279 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2280 mem, &comp_len); 2281 kunmap_local(mem); 2282 2283 if (unlikely(ret)) { 2284 zcomp_stream_put(zstrm); 2285 pr_err("Compression failed! err=%d\n", ret); 2286 return ret; 2287 } 2288 2289 if (comp_len >= huge_class_size) { 2290 zcomp_stream_put(zstrm); 2291 return write_incompressible_page(zram, page, index); 2292 } 2293 2294 handle = zs_malloc(zram->mem_pool, comp_len, 2295 GFP_NOIO | __GFP_NOWARN | 2296 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2297 if (IS_ERR_VALUE(handle)) { 2298 zcomp_stream_put(zstrm); 2299 return PTR_ERR((void *)handle); 2300 } 2301 2302 if (!zram_can_store_page(zram)) { 2303 zcomp_stream_put(zstrm); 2304 zs_free(zram->mem_pool, handle); 2305 return -ENOMEM; 2306 } 2307 2308 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2309 zcomp_stream_put(zstrm); 2310 2311 slot_lock(zram, index); 2312 slot_free(zram, index); 2313 set_slot_handle(zram, index, handle); 2314 set_slot_size(zram, index, comp_len); 2315 slot_unlock(zram, index); 2316 2317 /* Update stats */ 2318 atomic64_inc(&zram->stats.pages_stored); 2319 atomic64_add(comp_len, &zram->stats.compr_data_size); 2320 2321 return ret; 2322 } 2323 2324 /* 2325 * This is a partial IO. Read the full page before writing the changes. 2326 */ 2327 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2328 u32 index, int offset, struct bio *bio) 2329 { 2330 struct page *page = alloc_page(GFP_NOIO); 2331 int ret; 2332 2333 if (!page) 2334 return -ENOMEM; 2335 2336 ret = zram_read_page(zram, page, index, bio); 2337 if (!ret) { 2338 memcpy_from_bvec(page_address(page) + offset, bvec); 2339 ret = zram_write_page(zram, page, index); 2340 } 2341 __free_page(page); 2342 return ret; 2343 } 2344 2345 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2346 u32 index, int offset, struct bio *bio) 2347 { 2348 if (is_partial_io(bvec)) 2349 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 2350 return zram_write_page(zram, bvec->bv_page, index); 2351 } 2352 2353 #ifdef CONFIG_ZRAM_MULTI_COMP 2354 #define RECOMPRESS_IDLE (1 << 0) 2355 #define RECOMPRESS_HUGE (1 << 1) 2356 2357 static bool highest_priority_algorithm(struct zram *zram, u32 prio) 2358 { 2359 u32 p; 2360 2361 for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) { 2362 if (zram->comp_algs[p]) 2363 return false; 2364 } 2365 2366 return true; 2367 } 2368 2369 static void scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio, 2370 struct zram_pp_ctl *ctl) 2371 { 2372 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2373 unsigned long index; 2374 2375 for (index = 0; index < nr_pages; index++) { 2376 bool ok = true; 2377 2378 slot_lock(zram, index); 2379 if (!slot_allocated(zram, index)) 2380 goto next; 2381 2382 if (mode & RECOMPRESS_IDLE && 2383 !test_slot_flag(zram, index, ZRAM_IDLE)) 2384 goto next; 2385 2386 if (mode & RECOMPRESS_HUGE && 2387 !test_slot_flag(zram, index, ZRAM_HUGE)) 2388 goto next; 2389 2390 if (test_slot_flag(zram, index, ZRAM_WB) || 2391 test_slot_flag(zram, index, ZRAM_SAME) || 2392 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2393 goto next; 2394 2395 /* Already compressed with same or higher priority */ 2396 if (get_slot_comp_priority(zram, index) >= prio) 2397 goto next; 2398 2399 ok = place_pp_slot(zram, ctl, index); 2400 next: 2401 slot_unlock(zram, index); 2402 if (!ok) 2403 break; 2404 } 2405 } 2406 2407 /* 2408 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2409 * attempt to compress it using provided compression algorithm priority 2410 * (which is potentially more effective). 2411 * 2412 * Corresponding ZRAM slot should be locked. 2413 */ 2414 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2415 u64 *num_recomp_pages, u32 threshold, u32 prio) 2416 { 2417 struct zcomp_strm *zstrm = NULL; 2418 unsigned long handle_old; 2419 unsigned long handle_new; 2420 unsigned int comp_len_old; 2421 unsigned int comp_len_new; 2422 unsigned int class_index_old; 2423 unsigned int class_index_new; 2424 void *src; 2425 int ret = 0; 2426 2427 handle_old = get_slot_handle(zram, index); 2428 if (!handle_old) 2429 return -EINVAL; 2430 2431 comp_len_old = get_slot_size(zram, index); 2432 /* 2433 * Do not recompress objects that are already "small enough". 2434 */ 2435 if (comp_len_old < threshold) 2436 return 0; 2437 2438 ret = read_from_zspool(zram, page, index); 2439 if (ret) 2440 return ret; 2441 2442 /* 2443 * We touched this entry so mark it as non-IDLE. This makes sure that 2444 * we don't preserve IDLE flag and don't incorrectly pick this entry 2445 * for different post-processing type (e.g. writeback). 2446 */ 2447 clear_slot_flag(zram, index, ZRAM_IDLE); 2448 2449 zstrm = zcomp_stream_get(zram->comps[prio]); 2450 src = kmap_local_page(page); 2451 ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new); 2452 kunmap_local(src); 2453 2454 /* 2455 * Decrement the limit (if set) on pages we can recompress, even 2456 * when current recompression was unsuccessful or did not compress 2457 * the page below the threshold, because we still spent resources 2458 * on it. 2459 */ 2460 if (*num_recomp_pages) 2461 *num_recomp_pages -= 1; 2462 2463 if (ret) { 2464 zcomp_stream_put(zstrm); 2465 return ret; 2466 } 2467 2468 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2469 class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new); 2470 2471 if (class_index_new >= class_index_old || 2472 (threshold && comp_len_new >= threshold)) { 2473 zcomp_stream_put(zstrm); 2474 2475 /* 2476 * Secondary algorithms failed to re-compress the page 2477 * in a way that would save memory. 2478 * 2479 * Mark the object incompressible if the max-priority (the 2480 * last configured one) algorithm couldn't re-compress it. 2481 */ 2482 if (highest_priority_algorithm(zram, prio)) 2483 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2484 return 0; 2485 } 2486 2487 /* 2488 * We are holding per-CPU stream mutex and entry lock so better 2489 * avoid direct reclaim. Allocation error is not fatal since 2490 * we still have the old object in the mem_pool. 2491 * 2492 * XXX: technically, the node we really want here is the node that 2493 * holds the original compressed data. But that would require us to 2494 * modify zsmalloc API to return this information. For now, we will 2495 * make do with the node of the page allocated for recompression. 2496 */ 2497 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2498 GFP_NOIO | __GFP_NOWARN | 2499 __GFP_HIGHMEM | __GFP_MOVABLE, 2500 page_to_nid(page)); 2501 if (IS_ERR_VALUE(handle_new)) { 2502 zcomp_stream_put(zstrm); 2503 return PTR_ERR((void *)handle_new); 2504 } 2505 2506 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2507 zcomp_stream_put(zstrm); 2508 2509 slot_free(zram, index); 2510 set_slot_handle(zram, index, handle_new); 2511 set_slot_size(zram, index, comp_len_new); 2512 set_slot_comp_priority(zram, index, prio); 2513 2514 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2515 atomic64_inc(&zram->stats.pages_stored); 2516 2517 return 0; 2518 } 2519 2520 static ssize_t recompress_store(struct device *dev, 2521 struct device_attribute *attr, 2522 const char *buf, size_t len) 2523 { 2524 struct zram *zram = dev_to_zram(dev); 2525 char *args, *param, *val, *algo = NULL; 2526 u64 num_recomp_pages = ULLONG_MAX; 2527 struct zram_pp_ctl *ctl = NULL; 2528 s32 prio = ZRAM_SECONDARY_COMP; 2529 u32 mode = 0, threshold = 0; 2530 struct zram_pp_slot *pps; 2531 struct page *page = NULL; 2532 bool prio_param = false; 2533 ssize_t ret; 2534 2535 args = skip_spaces(buf); 2536 while (*args) { 2537 args = next_arg(args, ¶m, &val); 2538 2539 if (!val || !*val) 2540 return -EINVAL; 2541 2542 if (!strcmp(param, "type")) { 2543 if (!strcmp(val, "idle")) 2544 mode = RECOMPRESS_IDLE; 2545 if (!strcmp(val, "huge")) 2546 mode = RECOMPRESS_HUGE; 2547 if (!strcmp(val, "huge_idle")) 2548 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2549 continue; 2550 } 2551 2552 if (!strcmp(param, "max_pages")) { 2553 /* 2554 * Limit the number of entries (pages) we attempt to 2555 * recompress. 2556 */ 2557 ret = kstrtoull(val, 10, &num_recomp_pages); 2558 if (ret) 2559 return ret; 2560 continue; 2561 } 2562 2563 if (!strcmp(param, "threshold")) { 2564 /* 2565 * We will re-compress only idle objects equal or 2566 * greater in size than watermark. 2567 */ 2568 ret = kstrtouint(val, 10, &threshold); 2569 if (ret) 2570 return ret; 2571 continue; 2572 } 2573 2574 if (!strcmp(param, "algo")) { 2575 algo = val; 2576 continue; 2577 } 2578 2579 if (!strcmp(param, "priority")) { 2580 prio_param = true; 2581 ret = kstrtoint(val, 10, &prio); 2582 if (ret) 2583 return ret; 2584 continue; 2585 } 2586 } 2587 2588 if (threshold >= huge_class_size) 2589 return -EINVAL; 2590 2591 guard(rwsem_write)(&zram->dev_lock); 2592 if (!init_done(zram)) 2593 return -EINVAL; 2594 2595 if (prio_param) { 2596 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 2597 return -EINVAL; 2598 } 2599 2600 if (algo && prio_param) { 2601 ret = validate_algo_priority(zram, algo, prio); 2602 if (ret) 2603 return ret; 2604 } 2605 2606 if (algo && !prio_param) { 2607 prio = lookup_algo_priority(zram, algo, ZRAM_SECONDARY_COMP); 2608 if (prio < 0) 2609 return -EINVAL; 2610 } 2611 2612 if (!zram->comps[prio]) 2613 return -EINVAL; 2614 2615 page = alloc_page(GFP_KERNEL); 2616 if (!page) { 2617 ret = -ENOMEM; 2618 goto out; 2619 } 2620 2621 ctl = init_pp_ctl(); 2622 if (!ctl) { 2623 ret = -ENOMEM; 2624 goto out; 2625 } 2626 2627 scan_slots_for_recompress(zram, mode, prio, ctl); 2628 2629 ret = len; 2630 while ((pps = select_pp_slot(ctl))) { 2631 int err = 0; 2632 2633 if (!num_recomp_pages) 2634 break; 2635 2636 slot_lock(zram, pps->index); 2637 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) 2638 goto next; 2639 2640 err = recompress_slot(zram, pps->index, page, 2641 &num_recomp_pages, threshold, prio); 2642 next: 2643 slot_unlock(zram, pps->index); 2644 release_pp_slot(zram, pps); 2645 2646 if (err) { 2647 ret = err; 2648 break; 2649 } 2650 2651 cond_resched(); 2652 } 2653 2654 out: 2655 if (page) 2656 __free_page(page); 2657 release_pp_ctl(zram, ctl); 2658 return ret; 2659 } 2660 #endif 2661 2662 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2663 { 2664 size_t n = bio->bi_iter.bi_size; 2665 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2666 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2667 SECTOR_SHIFT; 2668 2669 /* 2670 * zram manages data in physical block size units. Because logical block 2671 * size isn't identical with physical block size on some arch, we 2672 * could get a discard request pointing to a specific offset within a 2673 * certain physical block. Although we can handle this request by 2674 * reading that physiclal block and decompressing and partially zeroing 2675 * and re-compressing and then re-storing it, this isn't reasonable 2676 * because our intent with a discard request is to save memory. So 2677 * skipping this logical block is appropriate here. 2678 */ 2679 if (offset) { 2680 if (n <= (PAGE_SIZE - offset)) 2681 return; 2682 2683 n -= (PAGE_SIZE - offset); 2684 index++; 2685 } 2686 2687 while (n >= PAGE_SIZE) { 2688 slot_lock(zram, index); 2689 slot_free(zram, index); 2690 slot_unlock(zram, index); 2691 atomic64_inc(&zram->stats.notify_free); 2692 index++; 2693 n -= PAGE_SIZE; 2694 } 2695 2696 bio_endio(bio); 2697 } 2698 2699 static void zram_bio_read(struct zram *zram, struct bio *bio) 2700 { 2701 unsigned long start_time = bio_start_io_acct(bio); 2702 struct bvec_iter iter = bio->bi_iter; 2703 2704 do { 2705 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2706 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2707 SECTOR_SHIFT; 2708 struct bio_vec bv = bio_iter_iovec(bio, iter); 2709 2710 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2711 2712 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2713 atomic64_inc(&zram->stats.failed_reads); 2714 bio->bi_status = BLK_STS_IOERR; 2715 break; 2716 } 2717 flush_dcache_page(bv.bv_page); 2718 2719 slot_lock(zram, index); 2720 mark_slot_accessed(zram, index); 2721 slot_unlock(zram, index); 2722 2723 bio_advance_iter_single(bio, &iter, bv.bv_len); 2724 } while (iter.bi_size); 2725 2726 bio_end_io_acct(bio, start_time); 2727 bio_endio(bio); 2728 } 2729 2730 static void zram_bio_write(struct zram *zram, struct bio *bio) 2731 { 2732 unsigned long start_time = bio_start_io_acct(bio); 2733 struct bvec_iter iter = bio->bi_iter; 2734 2735 do { 2736 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2737 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2738 SECTOR_SHIFT; 2739 struct bio_vec bv = bio_iter_iovec(bio, iter); 2740 2741 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2742 2743 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2744 atomic64_inc(&zram->stats.failed_writes); 2745 bio->bi_status = BLK_STS_IOERR; 2746 break; 2747 } 2748 2749 slot_lock(zram, index); 2750 mark_slot_accessed(zram, index); 2751 slot_unlock(zram, index); 2752 2753 bio_advance_iter_single(bio, &iter, bv.bv_len); 2754 } while (iter.bi_size); 2755 2756 bio_end_io_acct(bio, start_time); 2757 bio_endio(bio); 2758 } 2759 2760 /* 2761 * Handler function for all zram I/O requests. 2762 */ 2763 static void zram_submit_bio(struct bio *bio) 2764 { 2765 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2766 2767 switch (bio_op(bio)) { 2768 case REQ_OP_READ: 2769 zram_bio_read(zram, bio); 2770 break; 2771 case REQ_OP_WRITE: 2772 zram_bio_write(zram, bio); 2773 break; 2774 case REQ_OP_DISCARD: 2775 case REQ_OP_WRITE_ZEROES: 2776 zram_bio_discard(zram, bio); 2777 break; 2778 default: 2779 WARN_ON_ONCE(1); 2780 bio_endio(bio); 2781 } 2782 } 2783 2784 static void zram_slot_free_notify(struct block_device *bdev, 2785 unsigned long index) 2786 { 2787 struct zram *zram; 2788 2789 zram = bdev->bd_disk->private_data; 2790 2791 atomic64_inc(&zram->stats.notify_free); 2792 if (!slot_trylock(zram, index)) { 2793 atomic64_inc(&zram->stats.miss_free); 2794 return; 2795 } 2796 2797 slot_free(zram, index); 2798 slot_unlock(zram, index); 2799 } 2800 2801 static void zram_comp_params_reset(struct zram *zram) 2802 { 2803 u32 prio; 2804 2805 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2806 comp_params_reset(zram, prio); 2807 } 2808 } 2809 2810 static void zram_destroy_comps(struct zram *zram) 2811 { 2812 u32 prio; 2813 2814 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2815 struct zcomp *comp = zram->comps[prio]; 2816 2817 zram->comps[prio] = NULL; 2818 if (!comp) 2819 continue; 2820 zcomp_destroy(comp); 2821 } 2822 2823 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) 2824 zram->comp_algs[prio] = NULL; 2825 2826 zram_comp_params_reset(zram); 2827 } 2828 2829 static void zram_reset_device(struct zram *zram) 2830 { 2831 guard(rwsem_write)(&zram->dev_lock); 2832 2833 zram->limit_pages = 0; 2834 2835 set_capacity_and_notify(zram->disk, 0); 2836 part_stat_set_all(zram->disk->part0, 0); 2837 2838 /* I/O operation under all of CPU are done so let's free */ 2839 zram_meta_free(zram, zram->disksize); 2840 zram->disksize = 0; 2841 zram_destroy_comps(zram); 2842 memset(&zram->stats, 0, sizeof(zram->stats)); 2843 reset_bdev(zram); 2844 2845 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2846 } 2847 2848 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, 2849 const char *buf, size_t len) 2850 { 2851 u64 disksize; 2852 struct zcomp *comp; 2853 struct zram *zram = dev_to_zram(dev); 2854 int err; 2855 u32 prio; 2856 2857 disksize = memparse(buf, NULL); 2858 if (!disksize) 2859 return -EINVAL; 2860 2861 guard(rwsem_write)(&zram->dev_lock); 2862 if (init_done(zram)) { 2863 pr_info("Cannot change disksize for initialized device\n"); 2864 return -EBUSY; 2865 } 2866 2867 disksize = PAGE_ALIGN(disksize); 2868 if (!zram_meta_alloc(zram, disksize)) 2869 return -ENOMEM; 2870 2871 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2872 if (!zram->comp_algs[prio]) 2873 continue; 2874 2875 comp = zcomp_create(zram->comp_algs[prio], 2876 &zram->params[prio]); 2877 if (IS_ERR(comp)) { 2878 pr_err("Cannot initialise %s compressing backend\n", 2879 zram->comp_algs[prio]); 2880 err = PTR_ERR(comp); 2881 goto out_free_comps; 2882 } 2883 2884 zram->comps[prio] = comp; 2885 } 2886 zram->disksize = disksize; 2887 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2888 2889 return len; 2890 2891 out_free_comps: 2892 zram_destroy_comps(zram); 2893 zram_meta_free(zram, disksize); 2894 return err; 2895 } 2896 2897 static ssize_t reset_store(struct device *dev, 2898 struct device_attribute *attr, const char *buf, size_t len) 2899 { 2900 int ret; 2901 unsigned short do_reset; 2902 struct zram *zram; 2903 struct gendisk *disk; 2904 2905 ret = kstrtou16(buf, 10, &do_reset); 2906 if (ret) 2907 return ret; 2908 2909 if (!do_reset) 2910 return -EINVAL; 2911 2912 zram = dev_to_zram(dev); 2913 disk = zram->disk; 2914 2915 mutex_lock(&disk->open_mutex); 2916 /* Do not reset an active device or claimed device */ 2917 if (disk_openers(disk) || zram->claim) { 2918 mutex_unlock(&disk->open_mutex); 2919 return -EBUSY; 2920 } 2921 2922 /* From now on, anyone can't open /dev/zram[0-9] */ 2923 zram->claim = true; 2924 mutex_unlock(&disk->open_mutex); 2925 2926 /* Make sure all the pending I/O are finished */ 2927 sync_blockdev(disk->part0); 2928 zram_reset_device(zram); 2929 2930 mutex_lock(&disk->open_mutex); 2931 zram->claim = false; 2932 mutex_unlock(&disk->open_mutex); 2933 2934 return len; 2935 } 2936 2937 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2938 { 2939 struct zram *zram = disk->private_data; 2940 2941 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2942 2943 /* zram was claimed to reset so open request fails */ 2944 if (zram->claim) 2945 return -EBUSY; 2946 return 0; 2947 } 2948 2949 static const struct block_device_operations zram_devops = { 2950 .open = zram_open, 2951 .submit_bio = zram_submit_bio, 2952 .swap_slot_free_notify = zram_slot_free_notify, 2953 .owner = THIS_MODULE 2954 }; 2955 2956 static DEVICE_ATTR_RO(io_stat); 2957 static DEVICE_ATTR_RO(mm_stat); 2958 static DEVICE_ATTR_RO(debug_stat); 2959 static DEVICE_ATTR_WO(compact); 2960 static DEVICE_ATTR_RW(disksize); 2961 static DEVICE_ATTR_RO(initstate); 2962 static DEVICE_ATTR_WO(reset); 2963 static DEVICE_ATTR_WO(mem_limit); 2964 static DEVICE_ATTR_WO(mem_used_max); 2965 static DEVICE_ATTR_WO(idle); 2966 static DEVICE_ATTR_RW(comp_algorithm); 2967 #ifdef CONFIG_ZRAM_WRITEBACK 2968 static DEVICE_ATTR_RO(bd_stat); 2969 static DEVICE_ATTR_RW(backing_dev); 2970 static DEVICE_ATTR_WO(writeback); 2971 static DEVICE_ATTR_RW(writeback_limit); 2972 static DEVICE_ATTR_RW(writeback_limit_enable); 2973 static DEVICE_ATTR_RW(writeback_batch_size); 2974 static DEVICE_ATTR_RW(compressed_writeback); 2975 #endif 2976 #ifdef CONFIG_ZRAM_MULTI_COMP 2977 static DEVICE_ATTR_RW(recomp_algorithm); 2978 static DEVICE_ATTR_WO(recompress); 2979 #endif 2980 static DEVICE_ATTR_WO(algorithm_params); 2981 2982 static struct attribute *zram_disk_attrs[] = { 2983 &dev_attr_disksize.attr, 2984 &dev_attr_initstate.attr, 2985 &dev_attr_reset.attr, 2986 &dev_attr_compact.attr, 2987 &dev_attr_mem_limit.attr, 2988 &dev_attr_mem_used_max.attr, 2989 &dev_attr_idle.attr, 2990 &dev_attr_comp_algorithm.attr, 2991 #ifdef CONFIG_ZRAM_WRITEBACK 2992 &dev_attr_bd_stat.attr, 2993 &dev_attr_backing_dev.attr, 2994 &dev_attr_writeback.attr, 2995 &dev_attr_writeback_limit.attr, 2996 &dev_attr_writeback_limit_enable.attr, 2997 &dev_attr_writeback_batch_size.attr, 2998 &dev_attr_compressed_writeback.attr, 2999 #endif 3000 &dev_attr_io_stat.attr, 3001 &dev_attr_mm_stat.attr, 3002 &dev_attr_debug_stat.attr, 3003 #ifdef CONFIG_ZRAM_MULTI_COMP 3004 &dev_attr_recomp_algorithm.attr, 3005 &dev_attr_recompress.attr, 3006 #endif 3007 &dev_attr_algorithm_params.attr, 3008 NULL, 3009 }; 3010 3011 ATTRIBUTE_GROUPS(zram_disk); 3012 3013 /* 3014 * Allocate and initialize new zram device. the function returns 3015 * '>= 0' device_id upon success, and negative value otherwise. 3016 */ 3017 static int zram_add(void) 3018 { 3019 struct queue_limits lim = { 3020 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 3021 /* 3022 * To ensure that we always get PAGE_SIZE aligned and 3023 * n*PAGE_SIZED sized I/O requests. 3024 */ 3025 .physical_block_size = PAGE_SIZE, 3026 .io_min = PAGE_SIZE, 3027 .io_opt = PAGE_SIZE, 3028 .max_hw_discard_sectors = UINT_MAX, 3029 /* 3030 * zram_bio_discard() will clear all logical blocks if logical 3031 * block size is identical with physical block size(PAGE_SIZE). 3032 * But if it is different, we will skip discarding some parts of 3033 * logical blocks in the part of the request range which isn't 3034 * aligned to physical block size. So we can't ensure that all 3035 * discarded logical blocks are zeroed. 3036 */ 3037 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 3038 .max_write_zeroes_sectors = UINT_MAX, 3039 #endif 3040 .features = BLK_FEAT_STABLE_WRITES | 3041 BLK_FEAT_SYNCHRONOUS, 3042 }; 3043 struct zram *zram; 3044 int ret, device_id; 3045 3046 zram = kzalloc_obj(struct zram); 3047 if (!zram) 3048 return -ENOMEM; 3049 3050 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 3051 if (ret < 0) 3052 goto out_free_dev; 3053 device_id = ret; 3054 3055 init_rwsem(&zram->dev_lock); 3056 #ifdef CONFIG_ZRAM_WRITEBACK 3057 zram->wb_batch_size = 32; 3058 zram->compressed_wb = false; 3059 #endif 3060 3061 /* gendisk structure */ 3062 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 3063 if (IS_ERR(zram->disk)) { 3064 pr_err("Error allocating disk structure for device %d\n", 3065 device_id); 3066 ret = PTR_ERR(zram->disk); 3067 goto out_free_idr; 3068 } 3069 3070 zram->disk->major = zram_major; 3071 zram->disk->first_minor = device_id; 3072 zram->disk->minors = 1; 3073 zram->disk->flags |= GENHD_FL_NO_PART; 3074 zram->disk->fops = &zram_devops; 3075 zram->disk->private_data = zram; 3076 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 3077 zram_comp_params_reset(zram); 3078 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 3079 3080 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 3081 set_capacity(zram->disk, 0); 3082 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 3083 if (ret) 3084 goto out_cleanup_disk; 3085 3086 zram_debugfs_register(zram); 3087 pr_info("Added device: %s\n", zram->disk->disk_name); 3088 return device_id; 3089 3090 out_cleanup_disk: 3091 put_disk(zram->disk); 3092 out_free_idr: 3093 idr_remove(&zram_index_idr, device_id); 3094 out_free_dev: 3095 kfree(zram); 3096 return ret; 3097 } 3098 3099 static int zram_remove(struct zram *zram) 3100 { 3101 bool claimed; 3102 3103 mutex_lock(&zram->disk->open_mutex); 3104 if (disk_openers(zram->disk)) { 3105 mutex_unlock(&zram->disk->open_mutex); 3106 return -EBUSY; 3107 } 3108 3109 claimed = zram->claim; 3110 if (!claimed) 3111 zram->claim = true; 3112 mutex_unlock(&zram->disk->open_mutex); 3113 3114 zram_debugfs_unregister(zram); 3115 3116 if (claimed) { 3117 /* 3118 * If we were claimed by reset_store(), del_gendisk() will 3119 * wait until reset_store() is done, so nothing need to do. 3120 */ 3121 ; 3122 } else { 3123 /* Make sure all the pending I/O are finished */ 3124 sync_blockdev(zram->disk->part0); 3125 zram_reset_device(zram); 3126 } 3127 3128 pr_info("Removed device: %s\n", zram->disk->disk_name); 3129 3130 del_gendisk(zram->disk); 3131 3132 /* del_gendisk drains pending reset_store */ 3133 WARN_ON_ONCE(claimed && zram->claim); 3134 3135 /* 3136 * disksize_store() may be called in between zram_reset_device() 3137 * and del_gendisk(), so run the last reset to avoid leaking 3138 * anything allocated with disksize_store() 3139 */ 3140 zram_reset_device(zram); 3141 3142 put_disk(zram->disk); 3143 kfree(zram); 3144 return 0; 3145 } 3146 3147 /* zram-control sysfs attributes */ 3148 3149 /* 3150 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3151 * sense that reading from this file does alter the state of your system -- it 3152 * creates a new un-initialized zram device and returns back this device's 3153 * device_id (or an error code if it fails to create a new device). 3154 */ 3155 static ssize_t hot_add_show(const struct class *class, 3156 const struct class_attribute *attr, 3157 char *buf) 3158 { 3159 int ret; 3160 3161 mutex_lock(&zram_index_mutex); 3162 ret = zram_add(); 3163 mutex_unlock(&zram_index_mutex); 3164 3165 if (ret < 0) 3166 return ret; 3167 return sysfs_emit(buf, "%d\n", ret); 3168 } 3169 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3170 static struct class_attribute class_attr_hot_add = 3171 __ATTR(hot_add, 0400, hot_add_show, NULL); 3172 3173 static ssize_t hot_remove_store(const struct class *class, 3174 const struct class_attribute *attr, 3175 const char *buf, 3176 size_t count) 3177 { 3178 struct zram *zram; 3179 int ret, dev_id; 3180 3181 /* dev_id is gendisk->first_minor, which is `int' */ 3182 ret = kstrtoint(buf, 10, &dev_id); 3183 if (ret) 3184 return ret; 3185 if (dev_id < 0) 3186 return -EINVAL; 3187 3188 mutex_lock(&zram_index_mutex); 3189 3190 zram = idr_find(&zram_index_idr, dev_id); 3191 if (zram) { 3192 ret = zram_remove(zram); 3193 if (!ret) 3194 idr_remove(&zram_index_idr, dev_id); 3195 } else { 3196 ret = -ENODEV; 3197 } 3198 3199 mutex_unlock(&zram_index_mutex); 3200 return ret ? ret : count; 3201 } 3202 static CLASS_ATTR_WO(hot_remove); 3203 3204 static struct attribute *zram_control_class_attrs[] = { 3205 &class_attr_hot_add.attr, 3206 &class_attr_hot_remove.attr, 3207 NULL, 3208 }; 3209 ATTRIBUTE_GROUPS(zram_control_class); 3210 3211 static struct class zram_control_class = { 3212 .name = "zram-control", 3213 .class_groups = zram_control_class_groups, 3214 }; 3215 3216 static int zram_remove_cb(int id, void *ptr, void *data) 3217 { 3218 WARN_ON_ONCE(zram_remove(ptr)); 3219 return 0; 3220 } 3221 3222 static void destroy_devices(void) 3223 { 3224 class_unregister(&zram_control_class); 3225 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3226 zram_debugfs_destroy(); 3227 idr_destroy(&zram_index_idr); 3228 unregister_blkdev(zram_major, "zram"); 3229 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3230 } 3231 3232 static int __init zram_init(void) 3233 { 3234 struct zram_table_entry zram_te; 3235 int ret; 3236 3237 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); 3238 3239 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3240 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3241 if (ret < 0) 3242 return ret; 3243 3244 ret = class_register(&zram_control_class); 3245 if (ret) { 3246 pr_err("Unable to register zram-control class\n"); 3247 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3248 return ret; 3249 } 3250 3251 zram_debugfs_create(); 3252 zram_major = register_blkdev(0, "zram"); 3253 if (zram_major <= 0) { 3254 pr_err("Unable to get major number\n"); 3255 class_unregister(&zram_control_class); 3256 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3257 return -EBUSY; 3258 } 3259 3260 while (num_devices != 0) { 3261 mutex_lock(&zram_index_mutex); 3262 ret = zram_add(); 3263 mutex_unlock(&zram_index_mutex); 3264 if (ret < 0) 3265 goto out_error; 3266 num_devices--; 3267 } 3268 3269 return 0; 3270 3271 out_error: 3272 destroy_devices(); 3273 return ret; 3274 } 3275 3276 static void __exit zram_exit(void) 3277 { 3278 destroy_devices(); 3279 } 3280 3281 module_init(zram_init); 3282 module_exit(zram_exit); 3283 3284 module_param(num_devices, uint, 0); 3285 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3286 3287 MODULE_LICENSE("Dual BSD/GPL"); 3288 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3289 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3290