1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define pr_fmt(fmt) "zram: " fmt 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/bio.h> 20 #include <linux/bitops.h> 21 #include <linux/blkdev.h> 22 #include <linux/buffer_head.h> 23 #include <linux/device.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/backing-dev.h> 27 #include <linux/string.h> 28 #include <linux/vmalloc.h> 29 #include <linux/err.h> 30 #include <linux/idr.h> 31 #include <linux/sysfs.h> 32 #include <linux/debugfs.h> 33 #include <linux/cpuhotplug.h> 34 #include <linux/part_stat.h> 35 #include <linux/kernel_read_file.h> 36 #include <linux/rcupdate.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 #define ZRAM_MAX_ALGO_NAME_SZ 128 48 49 /* Module params (documentation at end) */ 50 static unsigned int num_devices = 1; 51 /* 52 * Pages that compress to sizes equals or greater than this are stored 53 * uncompressed in memory. 54 */ 55 static size_t huge_class_size; 56 57 static const struct block_device_operations zram_devops; 58 59 static void slot_free(struct zram *zram, u32 index); 60 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 61 62 static void slot_lock_init(struct zram *zram, u32 index) 63 { 64 static struct lock_class_key __key; 65 66 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 67 &__key, 0); 68 } 69 70 /* 71 * entry locking rules: 72 * 73 * 1) Lock is exclusive 74 * 75 * 2) lock() function can sleep waiting for the lock 76 * 77 * 3) Lock owner can sleep 78 * 79 * 4) Use TRY lock variant when in atomic context 80 * - must check return value and handle locking failers 81 */ 82 static __must_check bool slot_trylock(struct zram *zram, u32 index) 83 { 84 unsigned long *lock = &zram->table[index].__lock; 85 86 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 87 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 88 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 89 return true; 90 } 91 92 return false; 93 } 94 95 static void slot_lock(struct zram *zram, u32 index) 96 { 97 unsigned long *lock = &zram->table[index].__lock; 98 99 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 100 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 101 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 102 } 103 104 static void slot_unlock(struct zram *zram, u32 index) 105 { 106 unsigned long *lock = &zram->table[index].__lock; 107 108 mutex_release(slot_dep_map(zram, index), _RET_IP_); 109 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 110 } 111 112 static inline bool init_done(struct zram *zram) 113 { 114 return zram->disksize; 115 } 116 117 static inline struct zram *dev_to_zram(struct device *dev) 118 { 119 return (struct zram *)dev_to_disk(dev)->private_data; 120 } 121 122 static unsigned long get_slot_handle(struct zram *zram, u32 index) 123 { 124 return zram->table[index].handle; 125 } 126 127 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) 128 { 129 zram->table[index].handle = handle; 130 } 131 132 static bool test_slot_flag(struct zram *zram, u32 index, 133 enum zram_pageflags flag) 134 { 135 return zram->table[index].attr.flags & BIT(flag); 136 } 137 138 static void set_slot_flag(struct zram *zram, u32 index, 139 enum zram_pageflags flag) 140 { 141 zram->table[index].attr.flags |= BIT(flag); 142 } 143 144 static void clear_slot_flag(struct zram *zram, u32 index, 145 enum zram_pageflags flag) 146 { 147 zram->table[index].attr.flags &= ~BIT(flag); 148 } 149 150 static size_t get_slot_size(struct zram *zram, u32 index) 151 { 152 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 153 } 154 155 static void set_slot_size(struct zram *zram, u32 index, size_t size) 156 { 157 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; 158 159 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; 160 } 161 162 static inline bool slot_allocated(struct zram *zram, u32 index) 163 { 164 return get_slot_size(zram, index) || 165 test_slot_flag(zram, index, ZRAM_SAME) || 166 test_slot_flag(zram, index, ZRAM_WB); 167 } 168 169 static inline void set_slot_comp_priority(struct zram *zram, u32 index, 170 u32 prio) 171 { 172 prio &= ZRAM_COMP_PRIORITY_MASK; 173 /* 174 * Clear previous priority value first, in case if we recompress 175 * further an already recompressed page 176 */ 177 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << 178 ZRAM_COMP_PRIORITY_BIT1); 179 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 180 } 181 182 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) 183 { 184 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; 185 186 return prio & ZRAM_COMP_PRIORITY_MASK; 187 } 188 189 static void mark_slot_accessed(struct zram *zram, u32 index) 190 { 191 clear_slot_flag(zram, index, ZRAM_IDLE); 192 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 193 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 194 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); 195 #endif 196 } 197 198 static inline void update_used_max(struct zram *zram, const unsigned long pages) 199 { 200 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 201 202 do { 203 if (cur_max >= pages) 204 return; 205 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 206 &cur_max, pages)); 207 } 208 209 static bool zram_can_store_page(struct zram *zram) 210 { 211 unsigned long alloced_pages; 212 213 alloced_pages = zs_get_total_pages(zram->mem_pool); 214 update_used_max(zram, alloced_pages); 215 216 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 217 } 218 219 #if PAGE_SIZE != 4096 220 static inline bool is_partial_io(struct bio_vec *bvec) 221 { 222 return bvec->bv_len != PAGE_SIZE; 223 } 224 #define ZRAM_PARTIAL_IO 1 225 #else 226 static inline bool is_partial_io(struct bio_vec *bvec) 227 { 228 return false; 229 } 230 #endif 231 232 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 233 struct zram_pp_slot { 234 unsigned long index; 235 struct list_head entry; 236 }; 237 238 /* 239 * A post-processing bucket is, essentially, a size class, this defines 240 * the range (in bytes) of pp-slots sizes in particular bucket. 241 */ 242 #define PP_BUCKET_SIZE_RANGE 64 243 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 244 245 struct zram_pp_ctl { 246 struct list_head pp_buckets[NUM_PP_BUCKETS]; 247 }; 248 249 static struct zram_pp_ctl *init_pp_ctl(void) 250 { 251 struct zram_pp_ctl *ctl; 252 u32 idx; 253 254 ctl = kmalloc_obj(*ctl); 255 if (!ctl) 256 return NULL; 257 258 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 259 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 260 return ctl; 261 } 262 263 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 264 { 265 list_del_init(&pps->entry); 266 267 slot_lock(zram, pps->index); 268 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 269 slot_unlock(zram, pps->index); 270 271 kfree(pps); 272 } 273 274 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 275 { 276 u32 idx; 277 278 if (!ctl) 279 return; 280 281 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 282 while (!list_empty(&ctl->pp_buckets[idx])) { 283 struct zram_pp_slot *pps; 284 285 pps = list_first_entry(&ctl->pp_buckets[idx], 286 struct zram_pp_slot, 287 entry); 288 release_pp_slot(zram, pps); 289 } 290 } 291 292 kfree(ctl); 293 } 294 295 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 296 u32 index) 297 { 298 struct zram_pp_slot *pps; 299 u32 bid; 300 301 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN); 302 if (!pps) 303 return false; 304 305 INIT_LIST_HEAD(&pps->entry); 306 pps->index = index; 307 308 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 309 list_add(&pps->entry, &ctl->pp_buckets[bid]); 310 311 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 312 return true; 313 } 314 315 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 316 { 317 struct zram_pp_slot *pps = NULL; 318 s32 idx = NUM_PP_BUCKETS - 1; 319 320 /* The higher the bucket id the more optimal slot post-processing is */ 321 while (idx >= 0) { 322 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 323 struct zram_pp_slot, 324 entry); 325 if (pps) 326 break; 327 328 idx--; 329 } 330 return pps; 331 } 332 #endif 333 334 static inline void zram_fill_page(void *ptr, unsigned long len, 335 unsigned long value) 336 { 337 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 338 memset_l(ptr, value, len / sizeof(unsigned long)); 339 } 340 341 static bool page_same_filled(void *ptr, unsigned long *element) 342 { 343 unsigned long *page; 344 unsigned long val; 345 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 346 347 page = (unsigned long *)ptr; 348 val = page[0]; 349 350 if (val != page[last_pos]) 351 return false; 352 353 for (pos = 1; pos < last_pos; pos++) { 354 if (val != page[pos]) 355 return false; 356 } 357 358 *element = val; 359 360 return true; 361 } 362 363 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, 364 char *buf) 365 { 366 u32 val; 367 struct zram *zram = dev_to_zram(dev); 368 369 guard(rwsem_read)(&zram->dev_lock); 370 val = init_done(zram); 371 372 return sysfs_emit(buf, "%u\n", val); 373 } 374 375 static ssize_t disksize_show(struct device *dev, 376 struct device_attribute *attr, char *buf) 377 { 378 struct zram *zram = dev_to_zram(dev); 379 380 return sysfs_emit(buf, "%llu\n", zram->disksize); 381 } 382 383 static ssize_t mem_limit_store(struct device *dev, 384 struct device_attribute *attr, const char *buf, 385 size_t len) 386 { 387 u64 limit; 388 char *tmp; 389 struct zram *zram = dev_to_zram(dev); 390 391 limit = memparse(buf, &tmp); 392 if (buf == tmp) /* no chars parsed, invalid input */ 393 return -EINVAL; 394 395 guard(rwsem_write)(&zram->dev_lock); 396 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 397 398 return len; 399 } 400 401 static ssize_t mem_used_max_store(struct device *dev, 402 struct device_attribute *attr, 403 const char *buf, size_t len) 404 { 405 int err; 406 unsigned long val; 407 struct zram *zram = dev_to_zram(dev); 408 409 err = kstrtoul(buf, 10, &val); 410 if (err || val != 0) 411 return -EINVAL; 412 413 guard(rwsem_read)(&zram->dev_lock); 414 if (init_done(zram)) { 415 atomic_long_set(&zram->stats.max_used_pages, 416 zs_get_total_pages(zram->mem_pool)); 417 } 418 419 return len; 420 } 421 422 /* 423 * Mark all pages which are older than or equal to cutoff as IDLE. 424 * Callers should hold the zram init lock in read mode 425 */ 426 static void mark_idle(struct zram *zram, ktime_t cutoff) 427 { 428 int is_idle = 1; 429 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 430 int index; 431 432 for (index = 0; index < nr_pages; index++) { 433 /* 434 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 435 * post-processing (recompress, writeback) happens to the 436 * ZRAM_SAME slot. 437 * 438 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 439 */ 440 slot_lock(zram, index); 441 if (!slot_allocated(zram, index) || 442 test_slot_flag(zram, index, ZRAM_WB) || 443 test_slot_flag(zram, index, ZRAM_SAME)) { 444 slot_unlock(zram, index); 445 continue; 446 } 447 448 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 449 is_idle = !cutoff || 450 ktime_after(cutoff, zram->table[index].attr.ac_time); 451 #endif 452 if (is_idle) 453 set_slot_flag(zram, index, ZRAM_IDLE); 454 else 455 clear_slot_flag(zram, index, ZRAM_IDLE); 456 slot_unlock(zram, index); 457 } 458 } 459 460 static ssize_t idle_store(struct device *dev, struct device_attribute *attr, 461 const char *buf, size_t len) 462 { 463 struct zram *zram = dev_to_zram(dev); 464 ktime_t cutoff = 0; 465 466 if (!sysfs_streq(buf, "all")) { 467 /* 468 * If it did not parse as 'all' try to treat it as an integer 469 * when we have memory tracking enabled. 470 */ 471 u32 age_sec; 472 473 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && 474 !kstrtouint(buf, 0, &age_sec)) 475 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), 476 age_sec); 477 else 478 return -EINVAL; 479 } 480 481 guard(rwsem_read)(&zram->dev_lock); 482 if (!init_done(zram)) 483 return -EINVAL; 484 485 /* 486 * A cutoff of 0 marks everything as idle, this is the 487 * "all" behavior. 488 */ 489 mark_idle(zram, cutoff); 490 return len; 491 } 492 493 #ifdef CONFIG_ZRAM_WRITEBACK 494 #define INVALID_BDEV_BLOCK (~0UL) 495 496 static int read_from_zspool_raw(struct zram *zram, struct page *page, 497 u32 index); 498 static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 499 500 struct zram_wb_ctl { 501 /* idle list is accessed only by the writeback task, no concurency */ 502 struct list_head idle_reqs; 503 /* done list is accessed concurrently, protect by done_lock */ 504 struct list_head done_reqs; 505 wait_queue_head_t done_wait; 506 spinlock_t done_lock; 507 atomic_t num_inflight; 508 struct rcu_head rcu; 509 }; 510 511 struct zram_wb_req { 512 unsigned long blk_idx; 513 struct page *page; 514 struct zram_pp_slot *pps; 515 struct bio_vec bio_vec; 516 struct bio bio; 517 518 struct list_head entry; 519 }; 520 521 struct zram_rb_req { 522 struct work_struct work; 523 struct zram *zram; 524 struct page *page; 525 /* The read bio for backing device */ 526 struct bio *bio; 527 unsigned long blk_idx; 528 union { 529 /* The original bio to complete (async read) */ 530 struct bio *parent; 531 /* error status (sync read) */ 532 int error; 533 }; 534 u32 index; 535 }; 536 537 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 538 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, 539 char *buf) 540 { 541 struct zram *zram = dev_to_zram(dev); 542 ssize_t ret; 543 544 guard(rwsem_read)(&zram->dev_lock); 545 ret = sysfs_emit(buf, 546 "%8llu %8llu %8llu\n", 547 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 548 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 549 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 550 551 return ret; 552 } 553 554 static ssize_t compressed_writeback_store(struct device *dev, 555 struct device_attribute *attr, 556 const char *buf, size_t len) 557 { 558 struct zram *zram = dev_to_zram(dev); 559 bool val; 560 561 if (kstrtobool(buf, &val)) 562 return -EINVAL; 563 564 guard(rwsem_write)(&zram->dev_lock); 565 if (init_done(zram)) { 566 return -EBUSY; 567 } 568 569 zram->compressed_wb = val; 570 571 return len; 572 } 573 574 static ssize_t compressed_writeback_show(struct device *dev, 575 struct device_attribute *attr, 576 char *buf) 577 { 578 bool val; 579 struct zram *zram = dev_to_zram(dev); 580 581 guard(rwsem_read)(&zram->dev_lock); 582 val = zram->compressed_wb; 583 584 return sysfs_emit(buf, "%d\n", val); 585 } 586 587 static ssize_t writeback_limit_enable_store(struct device *dev, 588 struct device_attribute *attr, 589 const char *buf, size_t len) 590 { 591 struct zram *zram = dev_to_zram(dev); 592 u64 val; 593 594 if (kstrtoull(buf, 10, &val)) 595 return -EINVAL; 596 597 guard(rwsem_write)(&zram->dev_lock); 598 zram->wb_limit_enable = val; 599 600 return len; 601 } 602 603 static ssize_t writeback_limit_enable_show(struct device *dev, 604 struct device_attribute *attr, 605 char *buf) 606 { 607 bool val; 608 struct zram *zram = dev_to_zram(dev); 609 610 guard(rwsem_read)(&zram->dev_lock); 611 val = zram->wb_limit_enable; 612 613 return sysfs_emit(buf, "%d\n", val); 614 } 615 616 static ssize_t writeback_limit_store(struct device *dev, 617 struct device_attribute *attr, 618 const char *buf, size_t len) 619 { 620 struct zram *zram = dev_to_zram(dev); 621 u64 val; 622 623 if (kstrtoull(buf, 10, &val)) 624 return -EINVAL; 625 626 /* 627 * When the page size is greater than 4KB, if bd_wb_limit is set to 628 * a value that is not page - size aligned, it will cause value 629 * wrapping. For example, when the page size is set to 16KB and 630 * bd_wb_limit is set to 3, a single write - back operation will 631 * cause bd_wb_limit to become -1. Even more terrifying is that 632 * bd_wb_limit is an unsigned number. 633 */ 634 val = rounddown(val, PAGE_SIZE / 4096); 635 636 guard(rwsem_write)(&zram->dev_lock); 637 zram->bd_wb_limit = val; 638 639 return len; 640 } 641 642 static ssize_t writeback_limit_show(struct device *dev, 643 struct device_attribute *attr, char *buf) 644 { 645 u64 val; 646 struct zram *zram = dev_to_zram(dev); 647 648 guard(rwsem_read)(&zram->dev_lock); 649 val = zram->bd_wb_limit; 650 651 return sysfs_emit(buf, "%llu\n", val); 652 } 653 654 static ssize_t writeback_batch_size_store(struct device *dev, 655 struct device_attribute *attr, 656 const char *buf, size_t len) 657 { 658 struct zram *zram = dev_to_zram(dev); 659 u32 val; 660 661 if (kstrtouint(buf, 10, &val)) 662 return -EINVAL; 663 664 if (!val) 665 return -EINVAL; 666 667 guard(rwsem_write)(&zram->dev_lock); 668 zram->wb_batch_size = val; 669 670 return len; 671 } 672 673 static ssize_t writeback_batch_size_show(struct device *dev, 674 struct device_attribute *attr, 675 char *buf) 676 { 677 u32 val; 678 struct zram *zram = dev_to_zram(dev); 679 680 guard(rwsem_read)(&zram->dev_lock); 681 val = zram->wb_batch_size; 682 683 return sysfs_emit(buf, "%u\n", val); 684 } 685 686 static void reset_bdev(struct zram *zram) 687 { 688 if (!zram->backing_dev) 689 return; 690 691 /* hope filp_close flush all of IO */ 692 filp_close(zram->backing_dev, NULL); 693 zram->backing_dev = NULL; 694 zram->bdev = NULL; 695 zram->disk->fops = &zram_devops; 696 kvfree(zram->bitmap); 697 zram->bitmap = NULL; 698 } 699 700 static ssize_t backing_dev_show(struct device *dev, 701 struct device_attribute *attr, char *buf) 702 { 703 struct file *file; 704 struct zram *zram = dev_to_zram(dev); 705 char *p; 706 ssize_t ret; 707 708 guard(rwsem_read)(&zram->dev_lock); 709 file = zram->backing_dev; 710 if (!file) { 711 memcpy(buf, "none\n", 5); 712 return 5; 713 } 714 715 p = file_path(file, buf, PAGE_SIZE - 1); 716 if (IS_ERR(p)) 717 return PTR_ERR(p); 718 719 ret = strlen(p); 720 memmove(buf, p, ret); 721 buf[ret++] = '\n'; 722 return ret; 723 } 724 725 static ssize_t backing_dev_store(struct device *dev, 726 struct device_attribute *attr, const char *buf, 727 size_t len) 728 { 729 char *file_name; 730 size_t sz; 731 struct file *backing_dev = NULL; 732 struct inode *inode; 733 unsigned int bitmap_sz; 734 unsigned long nr_pages, *bitmap = NULL; 735 int err; 736 struct zram *zram = dev_to_zram(dev); 737 738 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 739 if (!file_name) 740 return -ENOMEM; 741 742 guard(rwsem_write)(&zram->dev_lock); 743 if (init_done(zram)) { 744 pr_info("Can't setup backing device for initialized device\n"); 745 err = -EBUSY; 746 goto out; 747 } 748 749 strscpy(file_name, buf, PATH_MAX); 750 /* ignore trailing newline */ 751 sz = strlen(file_name); 752 if (sz > 0 && file_name[sz - 1] == '\n') 753 file_name[sz - 1] = 0x00; 754 755 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 756 if (IS_ERR(backing_dev)) { 757 err = PTR_ERR(backing_dev); 758 backing_dev = NULL; 759 goto out; 760 } 761 762 inode = backing_dev->f_mapping->host; 763 764 /* Support only block device in this moment */ 765 if (!S_ISBLK(inode->i_mode)) { 766 err = -ENOTBLK; 767 goto out; 768 } 769 770 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 771 /* Refuse to use zero sized device (also prevents self reference) */ 772 if (!nr_pages) { 773 err = -EINVAL; 774 goto out; 775 } 776 777 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 778 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 779 if (!bitmap) { 780 err = -ENOMEM; 781 goto out; 782 } 783 784 reset_bdev(zram); 785 786 zram->bdev = I_BDEV(inode); 787 zram->backing_dev = backing_dev; 788 zram->bitmap = bitmap; 789 zram->nr_pages = nr_pages; 790 791 pr_info("setup backing device %s\n", file_name); 792 kfree(file_name); 793 794 return len; 795 out: 796 kvfree(bitmap); 797 798 if (backing_dev) 799 filp_close(backing_dev, NULL); 800 801 kfree(file_name); 802 803 return err; 804 } 805 806 static unsigned long zram_reserve_bdev_block(struct zram *zram) 807 { 808 unsigned long blk_idx; 809 810 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 811 if (blk_idx == zram->nr_pages) 812 return INVALID_BDEV_BLOCK; 813 814 set_bit(blk_idx, zram->bitmap); 815 atomic64_inc(&zram->stats.bd_count); 816 return blk_idx; 817 } 818 819 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 820 { 821 int was_set; 822 823 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 824 WARN_ON_ONCE(!was_set); 825 atomic64_dec(&zram->stats.bd_count); 826 } 827 828 static void release_wb_req(struct zram_wb_req *req) 829 { 830 __free_page(req->page); 831 kfree(req); 832 } 833 834 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 835 { 836 if (!wb_ctl) 837 return; 838 839 /* We should never have inflight requests at this point */ 840 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 841 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 842 843 while (!list_empty(&wb_ctl->idle_reqs)) { 844 struct zram_wb_req *req; 845 846 req = list_first_entry(&wb_ctl->idle_reqs, 847 struct zram_wb_req, entry); 848 list_del(&req->entry); 849 release_wb_req(req); 850 } 851 852 kfree_rcu(wb_ctl, rcu); 853 } 854 855 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 856 { 857 struct zram_wb_ctl *wb_ctl; 858 int i; 859 860 wb_ctl = kmalloc_obj(*wb_ctl); 861 if (!wb_ctl) 862 return NULL; 863 864 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 865 INIT_LIST_HEAD(&wb_ctl->done_reqs); 866 atomic_set(&wb_ctl->num_inflight, 0); 867 init_waitqueue_head(&wb_ctl->done_wait); 868 spin_lock_init(&wb_ctl->done_lock); 869 870 for (i = 0; i < zram->wb_batch_size; i++) { 871 struct zram_wb_req *req; 872 873 /* 874 * This is fatal condition only if we couldn't allocate 875 * any requests at all. Otherwise we just work with the 876 * requests that we have successfully allocated, so that 877 * writeback can still proceed, even if there is only one 878 * request on the idle list. 879 */ 880 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN); 881 if (!req) 882 break; 883 884 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 885 if (!req->page) { 886 kfree(req); 887 break; 888 } 889 890 list_add(&req->entry, &wb_ctl->idle_reqs); 891 } 892 893 /* We couldn't allocate any requests, so writeabck is not possible */ 894 if (list_empty(&wb_ctl->idle_reqs)) 895 goto release_wb_ctl; 896 897 return wb_ctl; 898 899 release_wb_ctl: 900 release_wb_ctl(wb_ctl); 901 return NULL; 902 } 903 904 static void zram_account_writeback_rollback(struct zram *zram) 905 { 906 lockdep_assert_held_write(&zram->dev_lock); 907 908 if (zram->wb_limit_enable) 909 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 910 } 911 912 static void zram_account_writeback_submit(struct zram *zram) 913 { 914 lockdep_assert_held_write(&zram->dev_lock); 915 916 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 917 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 918 } 919 920 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 921 { 922 u32 index = req->pps->index; 923 int err; 924 925 err = blk_status_to_errno(req->bio.bi_status); 926 if (err) { 927 /* 928 * Failed wb requests should not be accounted in wb_limit 929 * (if enabled). 930 */ 931 zram_account_writeback_rollback(zram); 932 zram_release_bdev_block(zram, req->blk_idx); 933 return err; 934 } 935 936 atomic64_inc(&zram->stats.bd_writes); 937 slot_lock(zram, index); 938 /* 939 * We release slot lock during writeback so slot can change under us: 940 * slot_free() or slot_free() and zram_write_page(). In both cases 941 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 942 * set ZRAM_PP_SLOT on such slots until current post-processing 943 * finishes. 944 */ 945 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { 946 zram_release_bdev_block(zram, req->blk_idx); 947 goto out; 948 } 949 950 clear_slot_flag(zram, index, ZRAM_IDLE); 951 if (test_slot_flag(zram, index, ZRAM_HUGE)) 952 atomic64_dec(&zram->stats.huge_pages); 953 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); 954 zs_free(zram->mem_pool, get_slot_handle(zram, index)); 955 set_slot_handle(zram, index, req->blk_idx); 956 set_slot_flag(zram, index, ZRAM_WB); 957 958 out: 959 slot_unlock(zram, index); 960 return 0; 961 } 962 963 static void zram_writeback_endio(struct bio *bio) 964 { 965 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 966 struct zram_wb_ctl *wb_ctl = bio->bi_private; 967 unsigned long flags; 968 969 rcu_read_lock(); 970 spin_lock_irqsave(&wb_ctl->done_lock, flags); 971 list_add(&req->entry, &wb_ctl->done_reqs); 972 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 973 974 wake_up(&wb_ctl->done_wait); 975 rcu_read_unlock(); 976 } 977 978 static void zram_submit_wb_request(struct zram *zram, 979 struct zram_wb_ctl *wb_ctl, 980 struct zram_wb_req *req) 981 { 982 /* 983 * wb_limit (if enabled) should be adjusted before submission, 984 * so that we don't over-submit. 985 */ 986 zram_account_writeback_submit(zram); 987 atomic_inc(&wb_ctl->num_inflight); 988 req->bio.bi_private = wb_ctl; 989 submit_bio(&req->bio); 990 } 991 992 static int zram_complete_done_reqs(struct zram *zram, 993 struct zram_wb_ctl *wb_ctl) 994 { 995 struct zram_wb_req *req; 996 unsigned long flags; 997 int ret = 0, err; 998 999 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1000 spin_lock_irqsave(&wb_ctl->done_lock, flags); 1001 req = list_first_entry_or_null(&wb_ctl->done_reqs, 1002 struct zram_wb_req, entry); 1003 if (req) 1004 list_del(&req->entry); 1005 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 1006 1007 /* ->num_inflight > 0 doesn't mean we have done requests */ 1008 if (!req) 1009 break; 1010 1011 err = zram_writeback_complete(zram, req); 1012 if (err) 1013 ret = err; 1014 1015 atomic_dec(&wb_ctl->num_inflight); 1016 release_pp_slot(zram, req->pps); 1017 req->pps = NULL; 1018 1019 list_add(&req->entry, &wb_ctl->idle_reqs); 1020 } 1021 1022 return ret; 1023 } 1024 1025 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 1026 { 1027 struct zram_wb_req *req; 1028 1029 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 1030 struct zram_wb_req, entry); 1031 if (req) 1032 list_del(&req->entry); 1033 return req; 1034 } 1035 1036 static int zram_writeback_slots(struct zram *zram, 1037 struct zram_pp_ctl *ctl, 1038 struct zram_wb_ctl *wb_ctl) 1039 { 1040 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1041 struct zram_wb_req *req = NULL; 1042 struct zram_pp_slot *pps; 1043 int ret = 0, err = 0; 1044 u32 index = 0; 1045 1046 while ((pps = select_pp_slot(ctl))) { 1047 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1048 ret = -EIO; 1049 break; 1050 } 1051 1052 while (!req) { 1053 req = zram_select_idle_req(wb_ctl); 1054 if (req) 1055 break; 1056 1057 wait_event(wb_ctl->done_wait, 1058 !list_empty(&wb_ctl->done_reqs)); 1059 1060 err = zram_complete_done_reqs(zram, wb_ctl); 1061 /* 1062 * BIO errors are not fatal, we continue and simply 1063 * attempt to writeback the remaining objects (pages). 1064 * At the same time we need to signal user-space that 1065 * some writes (at least one, but also could be all of 1066 * them) were not successful and we do so by returning 1067 * the most recent BIO error. 1068 */ 1069 if (err) 1070 ret = err; 1071 } 1072 1073 if (blk_idx == INVALID_BDEV_BLOCK) { 1074 blk_idx = zram_reserve_bdev_block(zram); 1075 if (blk_idx == INVALID_BDEV_BLOCK) { 1076 ret = -ENOSPC; 1077 break; 1078 } 1079 } 1080 1081 index = pps->index; 1082 slot_lock(zram, index); 1083 /* 1084 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1085 * slots can change in the meantime. If slots are accessed or 1086 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1087 * post-process them. 1088 */ 1089 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) 1090 goto next; 1091 if (zram->compressed_wb) 1092 err = read_from_zspool_raw(zram, req->page, index); 1093 else 1094 err = read_from_zspool(zram, req->page, index); 1095 if (err) 1096 goto next; 1097 slot_unlock(zram, index); 1098 1099 /* 1100 * From now on pp-slot is owned by the req, remove it from 1101 * its pp bucket. 1102 */ 1103 list_del_init(&pps->entry); 1104 1105 req->blk_idx = blk_idx; 1106 req->pps = pps; 1107 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1108 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1109 req->bio.bi_end_io = zram_writeback_endio; 1110 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1111 1112 zram_submit_wb_request(zram, wb_ctl, req); 1113 blk_idx = INVALID_BDEV_BLOCK; 1114 req = NULL; 1115 cond_resched(); 1116 continue; 1117 1118 next: 1119 slot_unlock(zram, index); 1120 release_pp_slot(zram, pps); 1121 } 1122 1123 /* 1124 * Selected idle req, but never submitted it due to some error or 1125 * wb limit. 1126 */ 1127 if (req) 1128 release_wb_req(req); 1129 1130 if (blk_idx != INVALID_BDEV_BLOCK) 1131 zram_release_bdev_block(zram, blk_idx); 1132 1133 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1134 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1135 err = zram_complete_done_reqs(zram, wb_ctl); 1136 if (err) 1137 ret = err; 1138 } 1139 1140 return ret; 1141 } 1142 1143 #define PAGE_WRITEBACK 0 1144 #define HUGE_WRITEBACK (1 << 0) 1145 #define IDLE_WRITEBACK (1 << 1) 1146 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1147 1148 static int parse_page_index(char *val, unsigned long nr_pages, 1149 unsigned long *lo, unsigned long *hi) 1150 { 1151 int ret; 1152 1153 ret = kstrtoul(val, 10, lo); 1154 if (ret) 1155 return ret; 1156 if (*lo >= nr_pages) 1157 return -ERANGE; 1158 *hi = *lo + 1; 1159 return 0; 1160 } 1161 1162 static int parse_page_indexes(char *val, unsigned long nr_pages, 1163 unsigned long *lo, unsigned long *hi) 1164 { 1165 char *delim; 1166 int ret; 1167 1168 delim = strchr(val, '-'); 1169 if (!delim) 1170 return -EINVAL; 1171 1172 *delim = 0x00; 1173 ret = kstrtoul(val, 10, lo); 1174 if (ret) 1175 return ret; 1176 if (*lo >= nr_pages) 1177 return -ERANGE; 1178 1179 ret = kstrtoul(delim + 1, 10, hi); 1180 if (ret) 1181 return ret; 1182 if (*hi >= nr_pages || *lo > *hi) 1183 return -ERANGE; 1184 *hi += 1; 1185 return 0; 1186 } 1187 1188 static int parse_mode(char *val, u32 *mode) 1189 { 1190 *mode = 0; 1191 1192 if (!strcmp(val, "idle")) 1193 *mode = IDLE_WRITEBACK; 1194 if (!strcmp(val, "huge")) 1195 *mode = HUGE_WRITEBACK; 1196 if (!strcmp(val, "huge_idle")) 1197 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1198 if (!strcmp(val, "incompressible")) 1199 *mode = INCOMPRESSIBLE_WRITEBACK; 1200 1201 if (*mode == 0) 1202 return -EINVAL; 1203 return 0; 1204 } 1205 1206 static void scan_slots_for_writeback(struct zram *zram, u32 mode, 1207 unsigned long lo, unsigned long hi, 1208 struct zram_pp_ctl *ctl) 1209 { 1210 u32 index = lo; 1211 1212 while (index < hi) { 1213 bool ok = true; 1214 1215 slot_lock(zram, index); 1216 if (!slot_allocated(zram, index)) 1217 goto next; 1218 1219 if (test_slot_flag(zram, index, ZRAM_WB) || 1220 test_slot_flag(zram, index, ZRAM_SAME)) 1221 goto next; 1222 1223 if (mode & IDLE_WRITEBACK && 1224 !test_slot_flag(zram, index, ZRAM_IDLE)) 1225 goto next; 1226 if (mode & HUGE_WRITEBACK && 1227 !test_slot_flag(zram, index, ZRAM_HUGE)) 1228 goto next; 1229 if (mode & INCOMPRESSIBLE_WRITEBACK && 1230 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1231 goto next; 1232 1233 ok = place_pp_slot(zram, ctl, index); 1234 next: 1235 slot_unlock(zram, index); 1236 if (!ok) 1237 break; 1238 index++; 1239 } 1240 } 1241 1242 static ssize_t writeback_store(struct device *dev, 1243 struct device_attribute *attr, 1244 const char *buf, size_t len) 1245 { 1246 struct zram *zram = dev_to_zram(dev); 1247 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1248 unsigned long lo = 0, hi = nr_pages; 1249 struct zram_pp_ctl *pp_ctl = NULL; 1250 struct zram_wb_ctl *wb_ctl = NULL; 1251 char *args, *param, *val; 1252 ssize_t ret = len; 1253 int err, mode = 0; 1254 1255 guard(rwsem_write)(&zram->dev_lock); 1256 if (!init_done(zram)) 1257 return -EINVAL; 1258 1259 if (!zram->backing_dev) 1260 return -ENODEV; 1261 1262 pp_ctl = init_pp_ctl(); 1263 if (!pp_ctl) 1264 return -ENOMEM; 1265 1266 wb_ctl = init_wb_ctl(zram); 1267 if (!wb_ctl) { 1268 ret = -ENOMEM; 1269 goto out; 1270 } 1271 1272 args = skip_spaces(buf); 1273 while (*args) { 1274 args = next_arg(args, ¶m, &val); 1275 1276 /* 1277 * Workaround to support the old writeback interface. 1278 * 1279 * The old writeback interface has a minor inconsistency and 1280 * requires key=value only for page_index parameter, while the 1281 * writeback mode is a valueless parameter. 1282 * 1283 * This is not the case anymore and now all parameters are 1284 * required to have values, however, we need to support the 1285 * legacy writeback interface format so we check if we can 1286 * recognize a valueless parameter as the (legacy) writeback 1287 * mode. 1288 */ 1289 if (!val || !*val) { 1290 err = parse_mode(param, &mode); 1291 if (err) { 1292 ret = err; 1293 goto out; 1294 } 1295 1296 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1297 break; 1298 } 1299 1300 if (!strcmp(param, "type")) { 1301 err = parse_mode(val, &mode); 1302 if (err) { 1303 ret = err; 1304 goto out; 1305 } 1306 1307 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1308 break; 1309 } 1310 1311 if (!strcmp(param, "page_index")) { 1312 err = parse_page_index(val, nr_pages, &lo, &hi); 1313 if (err) { 1314 ret = err; 1315 goto out; 1316 } 1317 1318 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1319 continue; 1320 } 1321 1322 if (!strcmp(param, "page_indexes")) { 1323 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1324 if (err) { 1325 ret = err; 1326 goto out; 1327 } 1328 1329 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1330 continue; 1331 } 1332 } 1333 1334 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1335 if (err) 1336 ret = err; 1337 1338 out: 1339 release_pp_ctl(zram, pp_ctl); 1340 release_wb_ctl(wb_ctl); 1341 1342 return ret; 1343 } 1344 1345 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1346 { 1347 struct zcomp_strm *zstrm; 1348 unsigned int size; 1349 int ret, prio; 1350 void *src; 1351 1352 slot_lock(zram, index); 1353 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1354 if (!test_slot_flag(zram, index, ZRAM_WB)) { 1355 slot_unlock(zram, index); 1356 /* We read some stale data, zero it out */ 1357 memset_page(page, 0, 0, PAGE_SIZE); 1358 return -EIO; 1359 } 1360 1361 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1362 slot_unlock(zram, index); 1363 return 0; 1364 } 1365 1366 size = get_slot_size(zram, index); 1367 prio = get_slot_comp_priority(zram, index); 1368 1369 zstrm = zcomp_stream_get(zram->comps[prio]); 1370 src = kmap_local_page(page); 1371 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1372 zstrm->local_copy); 1373 if (!ret) 1374 copy_page(src, zstrm->local_copy); 1375 kunmap_local(src); 1376 zcomp_stream_put(zstrm); 1377 slot_unlock(zram, index); 1378 1379 return ret; 1380 } 1381 1382 static void zram_deferred_decompress(struct work_struct *w) 1383 { 1384 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1385 struct page *page = bio_first_page_all(req->bio); 1386 struct zram *zram = req->zram; 1387 u32 index = req->index; 1388 int ret; 1389 1390 ret = decompress_bdev_page(zram, page, index); 1391 if (ret) 1392 req->parent->bi_status = BLK_STS_IOERR; 1393 1394 /* Decrement parent's ->remaining */ 1395 bio_endio(req->parent); 1396 bio_put(req->bio); 1397 kfree(req); 1398 } 1399 1400 static void zram_async_read_endio(struct bio *bio) 1401 { 1402 struct zram_rb_req *req = bio->bi_private; 1403 struct zram *zram = req->zram; 1404 1405 if (bio->bi_status) { 1406 req->parent->bi_status = bio->bi_status; 1407 bio_endio(req->parent); 1408 bio_put(bio); 1409 kfree(req); 1410 return; 1411 } 1412 1413 /* 1414 * NOTE: zram_async_read_endio() is not exactly right place for this. 1415 * Ideally, we need to do it after ZRAM_WB check, but this requires 1416 * us to use wq path even on systems that don't enable compressed 1417 * writeback, because we cannot take slot-lock in the current context. 1418 * 1419 * Keep the existing behavior for now. 1420 */ 1421 if (zram->compressed_wb == false) { 1422 /* No decompression needed, complete the parent IO */ 1423 bio_endio(req->parent); 1424 bio_put(bio); 1425 kfree(req); 1426 return; 1427 } 1428 1429 /* 1430 * zram decompression is sleepable, so we need to deffer it to 1431 * a preemptible context. 1432 */ 1433 INIT_WORK(&req->work, zram_deferred_decompress); 1434 queue_work(system_highpri_wq, &req->work); 1435 } 1436 1437 static int read_from_bdev_async(struct zram *zram, struct page *page, 1438 u32 index, unsigned long blk_idx, 1439 struct bio *parent) 1440 { 1441 struct zram_rb_req *req; 1442 struct bio *bio; 1443 1444 req = kmalloc_obj(*req, GFP_NOIO); 1445 if (!req) 1446 return -ENOMEM; 1447 1448 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1449 if (!bio) { 1450 kfree(req); 1451 return -ENOMEM; 1452 } 1453 1454 req->zram = zram; 1455 req->index = index; 1456 req->blk_idx = blk_idx; 1457 req->bio = bio; 1458 req->parent = parent; 1459 1460 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1461 bio->bi_private = req; 1462 bio->bi_end_io = zram_async_read_endio; 1463 1464 __bio_add_page(bio, page, PAGE_SIZE, 0); 1465 bio_inc_remaining(parent); 1466 submit_bio(bio); 1467 1468 return 0; 1469 } 1470 1471 static void zram_sync_read(struct work_struct *w) 1472 { 1473 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1474 struct bio_vec bv; 1475 struct bio bio; 1476 1477 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1478 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1479 __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1480 req->error = submit_bio_wait(&bio); 1481 } 1482 1483 /* 1484 * Block layer want one ->submit_bio to be active at a time, so if we use 1485 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1486 * use a worker thread context. 1487 */ 1488 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1489 unsigned long blk_idx) 1490 { 1491 struct zram_rb_req req; 1492 1493 req.page = page; 1494 req.zram = zram; 1495 req.blk_idx = blk_idx; 1496 1497 INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1498 queue_work(system_dfl_wq, &req.work); 1499 flush_work(&req.work); 1500 destroy_work_on_stack(&req.work); 1501 1502 if (req.error || zram->compressed_wb == false) 1503 return req.error; 1504 1505 return decompress_bdev_page(zram, page, index); 1506 } 1507 1508 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1509 unsigned long blk_idx, struct bio *parent) 1510 { 1511 atomic64_inc(&zram->stats.bd_reads); 1512 if (!parent) { 1513 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1514 return -EIO; 1515 return read_from_bdev_sync(zram, page, index, blk_idx); 1516 } 1517 return read_from_bdev_async(zram, page, index, blk_idx, parent); 1518 } 1519 #else 1520 static inline void reset_bdev(struct zram *zram) {}; 1521 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1522 unsigned long blk_idx, struct bio *parent) 1523 { 1524 return -EIO; 1525 } 1526 1527 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1528 { 1529 } 1530 #endif 1531 1532 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1533 1534 static struct dentry *zram_debugfs_root; 1535 1536 static void zram_debugfs_create(void) 1537 { 1538 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1539 } 1540 1541 static void zram_debugfs_destroy(void) 1542 { 1543 debugfs_remove_recursive(zram_debugfs_root); 1544 } 1545 1546 static ssize_t read_block_state(struct file *file, char __user *buf, 1547 size_t count, loff_t *ppos) 1548 { 1549 char *kbuf; 1550 ssize_t index, written = 0; 1551 struct zram *zram = file->private_data; 1552 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1553 1554 kbuf = kvmalloc(count, GFP_KERNEL); 1555 if (!kbuf) 1556 return -ENOMEM; 1557 1558 guard(rwsem_read)(&zram->dev_lock); 1559 if (!init_done(zram)) { 1560 kvfree(kbuf); 1561 return -EINVAL; 1562 } 1563 1564 for (index = *ppos; index < nr_pages; index++) { 1565 int copied; 1566 1567 slot_lock(zram, index); 1568 if (!slot_allocated(zram, index)) 1569 goto next; 1570 1571 copied = snprintf(kbuf + written, count, 1572 "%12zd %12u.%06d %c%c%c%c%c%c\n", 1573 index, zram->table[index].attr.ac_time, 0, 1574 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1575 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1576 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1577 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1578 get_slot_comp_priority(zram, index) ? 'r' : '.', 1579 test_slot_flag(zram, index, 1580 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1581 1582 if (count <= copied) { 1583 slot_unlock(zram, index); 1584 break; 1585 } 1586 written += copied; 1587 count -= copied; 1588 next: 1589 slot_unlock(zram, index); 1590 *ppos += 1; 1591 } 1592 1593 if (copy_to_user(buf, kbuf, written)) 1594 written = -EFAULT; 1595 kvfree(kbuf); 1596 1597 return written; 1598 } 1599 1600 static const struct file_operations proc_zram_block_state_op = { 1601 .open = simple_open, 1602 .read = read_block_state, 1603 .llseek = default_llseek, 1604 }; 1605 1606 static void zram_debugfs_register(struct zram *zram) 1607 { 1608 if (!zram_debugfs_root) 1609 return; 1610 1611 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1612 zram_debugfs_root); 1613 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1614 zram, &proc_zram_block_state_op); 1615 } 1616 1617 static void zram_debugfs_unregister(struct zram *zram) 1618 { 1619 debugfs_remove_recursive(zram->debugfs_dir); 1620 } 1621 #else 1622 static void zram_debugfs_create(void) {}; 1623 static void zram_debugfs_destroy(void) {}; 1624 static void zram_debugfs_register(struct zram *zram) {}; 1625 static void zram_debugfs_unregister(struct zram *zram) {}; 1626 #endif 1627 1628 /* Only algo parameter given, lookup by algo name */ 1629 static int lookup_algo_priority(struct zram *zram, const char *algo, 1630 u32 min_prio) 1631 { 1632 s32 prio; 1633 1634 for (prio = min_prio; prio < ZRAM_MAX_COMPS; prio++) { 1635 if (!zram->comp_algs[prio]) 1636 continue; 1637 1638 if (!strcmp(zram->comp_algs[prio], algo)) 1639 return prio; 1640 } 1641 1642 return -EINVAL; 1643 } 1644 1645 /* Both algo and priority parameters given, validate them */ 1646 static int validate_algo_priority(struct zram *zram, const char *algo, u32 prio) 1647 { 1648 if (prio >= ZRAM_MAX_COMPS) 1649 return -EINVAL; 1650 /* No algo at given priority */ 1651 if (!zram->comp_algs[prio]) 1652 return -EINVAL; 1653 /* A different algo at given priority */ 1654 if (strcmp(zram->comp_algs[prio], algo)) 1655 return -EINVAL; 1656 return 0; 1657 } 1658 1659 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1660 { 1661 zram->comp_algs[prio] = alg; 1662 } 1663 1664 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1665 { 1666 const char *alg; 1667 size_t sz; 1668 1669 sz = strlen(buf); 1670 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1671 return -E2BIG; 1672 1673 alg = zcomp_lookup_backend_name(buf); 1674 if (!alg) 1675 return -EINVAL; 1676 1677 guard(rwsem_write)(&zram->dev_lock); 1678 if (init_done(zram)) { 1679 pr_info("Can't change algorithm for initialized device\n"); 1680 return -EBUSY; 1681 } 1682 1683 comp_algorithm_set(zram, prio, alg); 1684 return 0; 1685 } 1686 1687 static void comp_params_reset(struct zram *zram, u32 prio) 1688 { 1689 struct zcomp_params *params = &zram->params[prio]; 1690 1691 vfree(params->dict); 1692 params->level = ZCOMP_PARAM_NOT_SET; 1693 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1694 params->dict_sz = 0; 1695 params->dict = NULL; 1696 } 1697 1698 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1699 const char *dict_path, 1700 struct deflate_params *deflate_params) 1701 { 1702 ssize_t sz = 0; 1703 1704 comp_params_reset(zram, prio); 1705 1706 if (dict_path) { 1707 sz = kernel_read_file_from_path(dict_path, 0, 1708 &zram->params[prio].dict, 1709 INT_MAX, 1710 NULL, 1711 READING_POLICY); 1712 if (sz < 0) 1713 return -EINVAL; 1714 } 1715 1716 zram->params[prio].dict_sz = sz; 1717 zram->params[prio].level = level; 1718 zram->params[prio].deflate.winbits = deflate_params->winbits; 1719 return 0; 1720 } 1721 1722 static ssize_t algorithm_params_store(struct device *dev, 1723 struct device_attribute *attr, 1724 const char *buf, 1725 size_t len) 1726 { 1727 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1728 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1729 struct deflate_params deflate_params; 1730 struct zram *zram = dev_to_zram(dev); 1731 bool prio_param = false; 1732 int ret; 1733 1734 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1735 1736 args = skip_spaces(buf); 1737 while (*args) { 1738 args = next_arg(args, ¶m, &val); 1739 1740 if (!val || !*val) 1741 return -EINVAL; 1742 1743 if (!strcmp(param, "priority")) { 1744 prio_param = true; 1745 ret = kstrtoint(val, 10, &prio); 1746 if (ret) 1747 return ret; 1748 continue; 1749 } 1750 1751 if (!strcmp(param, "level")) { 1752 ret = kstrtoint(val, 10, &level); 1753 if (ret) 1754 return ret; 1755 continue; 1756 } 1757 1758 if (!strcmp(param, "algo")) { 1759 algo = val; 1760 continue; 1761 } 1762 1763 if (!strcmp(param, "dict")) { 1764 dict_path = val; 1765 continue; 1766 } 1767 1768 if (!strcmp(param, "deflate.winbits")) { 1769 ret = kstrtoint(val, 10, &deflate_params.winbits); 1770 if (ret) 1771 return ret; 1772 continue; 1773 } 1774 } 1775 1776 guard(rwsem_write)(&zram->dev_lock); 1777 if (init_done(zram)) 1778 return -EBUSY; 1779 1780 if (prio_param) { 1781 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1782 return -EINVAL; 1783 } 1784 1785 if (algo && prio_param) { 1786 ret = validate_algo_priority(zram, algo, prio); 1787 if (ret) 1788 return ret; 1789 } 1790 1791 if (algo && !prio_param) { 1792 prio = lookup_algo_priority(zram, algo, ZRAM_PRIMARY_COMP); 1793 if (prio < 0) 1794 return -EINVAL; 1795 } 1796 1797 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1798 return ret ? ret : len; 1799 } 1800 1801 static ssize_t comp_algorithm_show(struct device *dev, 1802 struct device_attribute *attr, 1803 char *buf) 1804 { 1805 struct zram *zram = dev_to_zram(dev); 1806 ssize_t sz; 1807 1808 guard(rwsem_read)(&zram->dev_lock); 1809 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1810 return sz; 1811 } 1812 1813 static ssize_t comp_algorithm_store(struct device *dev, 1814 struct device_attribute *attr, 1815 const char *buf, 1816 size_t len) 1817 { 1818 struct zram *zram = dev_to_zram(dev); 1819 int ret; 1820 1821 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1822 return ret ? ret : len; 1823 } 1824 1825 #ifdef CONFIG_ZRAM_MULTI_COMP 1826 static ssize_t recomp_algorithm_show(struct device *dev, 1827 struct device_attribute *attr, 1828 char *buf) 1829 { 1830 struct zram *zram = dev_to_zram(dev); 1831 ssize_t sz = 0; 1832 u32 prio; 1833 1834 guard(rwsem_read)(&zram->dev_lock); 1835 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1836 if (!zram->comp_algs[prio]) 1837 continue; 1838 1839 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1840 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1841 } 1842 return sz; 1843 } 1844 1845 static ssize_t recomp_algorithm_store(struct device *dev, 1846 struct device_attribute *attr, 1847 const char *buf, 1848 size_t len) 1849 { 1850 struct zram *zram = dev_to_zram(dev); 1851 int prio = ZRAM_SECONDARY_COMP; 1852 char *args, *param, *val; 1853 char *alg = NULL; 1854 int ret; 1855 1856 args = skip_spaces(buf); 1857 while (*args) { 1858 args = next_arg(args, ¶m, &val); 1859 1860 if (!val || !*val) 1861 return -EINVAL; 1862 1863 if (!strcmp(param, "algo")) { 1864 alg = val; 1865 continue; 1866 } 1867 1868 if (!strcmp(param, "priority")) { 1869 ret = kstrtoint(val, 10, &prio); 1870 if (ret) 1871 return ret; 1872 continue; 1873 } 1874 } 1875 1876 if (!alg) 1877 return -EINVAL; 1878 1879 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1880 return -EINVAL; 1881 1882 ret = __comp_algorithm_store(zram, prio, alg); 1883 return ret ? ret : len; 1884 } 1885 #endif 1886 1887 static ssize_t compact_store(struct device *dev, struct device_attribute *attr, 1888 const char *buf, size_t len) 1889 { 1890 struct zram *zram = dev_to_zram(dev); 1891 1892 guard(rwsem_read)(&zram->dev_lock); 1893 if (!init_done(zram)) 1894 return -EINVAL; 1895 1896 zs_compact(zram->mem_pool); 1897 1898 return len; 1899 } 1900 1901 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, 1902 char *buf) 1903 { 1904 struct zram *zram = dev_to_zram(dev); 1905 ssize_t ret; 1906 1907 guard(rwsem_read)(&zram->dev_lock); 1908 ret = sysfs_emit(buf, 1909 "%8llu %8llu 0 %8llu\n", 1910 (u64)atomic64_read(&zram->stats.failed_reads), 1911 (u64)atomic64_read(&zram->stats.failed_writes), 1912 (u64)atomic64_read(&zram->stats.notify_free)); 1913 1914 return ret; 1915 } 1916 1917 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, 1918 char *buf) 1919 { 1920 struct zram *zram = dev_to_zram(dev); 1921 struct zs_pool_stats pool_stats; 1922 u64 orig_size, mem_used = 0; 1923 long max_used; 1924 ssize_t ret; 1925 1926 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1927 1928 guard(rwsem_read)(&zram->dev_lock); 1929 if (init_done(zram)) { 1930 mem_used = zs_get_total_pages(zram->mem_pool); 1931 zs_pool_stats(zram->mem_pool, &pool_stats); 1932 } 1933 1934 orig_size = atomic64_read(&zram->stats.pages_stored); 1935 max_used = atomic_long_read(&zram->stats.max_used_pages); 1936 1937 ret = sysfs_emit(buf, 1938 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1939 orig_size << PAGE_SHIFT, 1940 (u64)atomic64_read(&zram->stats.compr_data_size), 1941 mem_used << PAGE_SHIFT, 1942 zram->limit_pages << PAGE_SHIFT, 1943 max_used << PAGE_SHIFT, 1944 (u64)atomic64_read(&zram->stats.same_pages), 1945 atomic_long_read(&pool_stats.pages_compacted), 1946 (u64)atomic64_read(&zram->stats.huge_pages), 1947 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1948 1949 return ret; 1950 } 1951 1952 static ssize_t debug_stat_show(struct device *dev, 1953 struct device_attribute *attr, char *buf) 1954 { 1955 int version = 1; 1956 struct zram *zram = dev_to_zram(dev); 1957 ssize_t ret; 1958 1959 guard(rwsem_read)(&zram->dev_lock); 1960 ret = sysfs_emit(buf, 1961 "version: %d\n0 %8llu\n", 1962 version, 1963 (u64)atomic64_read(&zram->stats.miss_free)); 1964 1965 return ret; 1966 } 1967 1968 static void zram_meta_free(struct zram *zram, u64 disksize) 1969 { 1970 size_t num_pages = disksize >> PAGE_SHIFT; 1971 size_t index; 1972 1973 if (!zram->table) 1974 return; 1975 1976 /* Free all pages that are still in this zram device */ 1977 for (index = 0; index < num_pages; index++) 1978 slot_free(zram, index); 1979 1980 zs_destroy_pool(zram->mem_pool); 1981 vfree(zram->table); 1982 zram->table = NULL; 1983 } 1984 1985 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1986 { 1987 size_t num_pages, index; 1988 1989 num_pages = disksize >> PAGE_SHIFT; 1990 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1991 if (!zram->table) 1992 return false; 1993 1994 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1995 if (!zram->mem_pool) { 1996 vfree(zram->table); 1997 zram->table = NULL; 1998 return false; 1999 } 2000 2001 if (!huge_class_size) 2002 huge_class_size = zs_huge_class_size(zram->mem_pool); 2003 2004 for (index = 0; index < num_pages; index++) 2005 slot_lock_init(zram, index); 2006 2007 return true; 2008 } 2009 2010 static void slot_free(struct zram *zram, u32 index) 2011 { 2012 unsigned long handle; 2013 2014 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 2015 zram->table[index].attr.ac_time = 0; 2016 #endif 2017 2018 clear_slot_flag(zram, index, ZRAM_IDLE); 2019 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2020 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 2021 set_slot_comp_priority(zram, index, 0); 2022 2023 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 2024 /* 2025 * Writeback completion decrements ->huge_pages but keeps 2026 * ZRAM_HUGE flag for deferred decompression path. 2027 */ 2028 if (!test_slot_flag(zram, index, ZRAM_WB)) 2029 atomic64_dec(&zram->stats.huge_pages); 2030 clear_slot_flag(zram, index, ZRAM_HUGE); 2031 } 2032 2033 if (test_slot_flag(zram, index, ZRAM_WB)) { 2034 clear_slot_flag(zram, index, ZRAM_WB); 2035 zram_release_bdev_block(zram, get_slot_handle(zram, index)); 2036 goto out; 2037 } 2038 2039 /* 2040 * No memory is allocated for same element filled pages. 2041 * Simply clear same page flag. 2042 */ 2043 if (test_slot_flag(zram, index, ZRAM_SAME)) { 2044 clear_slot_flag(zram, index, ZRAM_SAME); 2045 atomic64_dec(&zram->stats.same_pages); 2046 goto out; 2047 } 2048 2049 handle = get_slot_handle(zram, index); 2050 if (!handle) 2051 return; 2052 2053 zs_free(zram->mem_pool, handle); 2054 2055 atomic64_sub(get_slot_size(zram, index), 2056 &zram->stats.compr_data_size); 2057 out: 2058 atomic64_dec(&zram->stats.pages_stored); 2059 set_slot_handle(zram, index, 0); 2060 set_slot_size(zram, index, 0); 2061 } 2062 2063 static int read_same_filled_page(struct zram *zram, struct page *page, 2064 u32 index) 2065 { 2066 void *mem; 2067 2068 mem = kmap_local_page(page); 2069 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); 2070 kunmap_local(mem); 2071 return 0; 2072 } 2073 2074 static int read_incompressible_page(struct zram *zram, struct page *page, 2075 u32 index) 2076 { 2077 unsigned long handle; 2078 void *src, *dst; 2079 2080 handle = get_slot_handle(zram, index); 2081 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); 2082 dst = kmap_local_page(page); 2083 copy_page(dst, src); 2084 kunmap_local(dst); 2085 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); 2086 2087 return 0; 2088 } 2089 2090 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 2091 { 2092 struct zcomp_strm *zstrm; 2093 unsigned long handle; 2094 unsigned int size; 2095 void *src, *dst; 2096 int ret, prio; 2097 2098 handle = get_slot_handle(zram, index); 2099 size = get_slot_size(zram, index); 2100 prio = get_slot_comp_priority(zram, index); 2101 2102 zstrm = zcomp_stream_get(zram->comps[prio]); 2103 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2104 zstrm->local_copy); 2105 dst = kmap_local_page(page); 2106 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 2107 kunmap_local(dst); 2108 zs_obj_read_end(zram->mem_pool, handle, size, src); 2109 zcomp_stream_put(zstrm); 2110 2111 return ret; 2112 } 2113 2114 #if defined CONFIG_ZRAM_WRITEBACK 2115 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 2116 { 2117 struct zcomp_strm *zstrm; 2118 unsigned long handle; 2119 unsigned int size; 2120 void *src; 2121 2122 handle = get_slot_handle(zram, index); 2123 size = get_slot_size(zram, index); 2124 2125 /* 2126 * We need to get stream just for ->local_copy buffer, in 2127 * case if object spans two physical pages. No decompression 2128 * takes place here, as we read raw compressed data. 2129 */ 2130 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2131 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2132 zstrm->local_copy); 2133 memcpy_to_page(page, 0, src, size); 2134 zs_obj_read_end(zram->mem_pool, handle, size, src); 2135 zcomp_stream_put(zstrm); 2136 2137 memzero_page(page, size, PAGE_SIZE - size); 2138 2139 return 0; 2140 } 2141 #endif 2142 2143 /* 2144 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2145 * Corresponding ZRAM slot should be locked. 2146 */ 2147 static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2148 { 2149 if (test_slot_flag(zram, index, ZRAM_SAME) || 2150 !get_slot_handle(zram, index)) 2151 return read_same_filled_page(zram, page, index); 2152 2153 if (!test_slot_flag(zram, index, ZRAM_HUGE)) 2154 return read_compressed_page(zram, page, index); 2155 else 2156 return read_incompressible_page(zram, page, index); 2157 } 2158 2159 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 2160 struct bio *parent) 2161 { 2162 int ret; 2163 2164 slot_lock(zram, index); 2165 if (!test_slot_flag(zram, index, ZRAM_WB)) { 2166 /* Slot should be locked through out the function call */ 2167 ret = read_from_zspool(zram, page, index); 2168 slot_unlock(zram, index); 2169 } else { 2170 unsigned long blk_idx = get_slot_handle(zram, index); 2171 2172 /* 2173 * The slot should be unlocked before reading from the backing 2174 * device. 2175 */ 2176 slot_unlock(zram, index); 2177 ret = read_from_bdev(zram, page, index, blk_idx, parent); 2178 } 2179 2180 /* Should NEVER happen. Return bio error if it does. */ 2181 if (WARN_ON(ret < 0)) 2182 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2183 2184 return ret; 2185 } 2186 2187 /* 2188 * Use a temporary buffer to decompress the page, as the decompressor 2189 * always expects a full page for the output. 2190 */ 2191 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2192 u32 index, int offset) 2193 { 2194 struct page *page = alloc_page(GFP_NOIO); 2195 int ret; 2196 2197 if (!page) 2198 return -ENOMEM; 2199 ret = zram_read_page(zram, page, index, NULL); 2200 if (likely(!ret)) 2201 memcpy_to_bvec(bvec, page_address(page) + offset); 2202 __free_page(page); 2203 return ret; 2204 } 2205 2206 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2207 u32 index, int offset, struct bio *bio) 2208 { 2209 if (is_partial_io(bvec)) 2210 return zram_bvec_read_partial(zram, bvec, index, offset); 2211 return zram_read_page(zram, bvec->bv_page, index, bio); 2212 } 2213 2214 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2215 u32 index) 2216 { 2217 slot_lock(zram, index); 2218 slot_free(zram, index); 2219 set_slot_flag(zram, index, ZRAM_SAME); 2220 set_slot_handle(zram, index, fill); 2221 slot_unlock(zram, index); 2222 2223 atomic64_inc(&zram->stats.same_pages); 2224 atomic64_inc(&zram->stats.pages_stored); 2225 2226 return 0; 2227 } 2228 2229 static int write_incompressible_page(struct zram *zram, struct page *page, 2230 u32 index) 2231 { 2232 unsigned long handle; 2233 void *src; 2234 2235 /* 2236 * This function is called from preemptible context so we don't need 2237 * to do optimistic and fallback to pessimistic handle allocation, 2238 * like we do for compressible pages. 2239 */ 2240 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2241 GFP_NOIO | __GFP_NOWARN | 2242 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2243 if (IS_ERR_VALUE(handle)) 2244 return PTR_ERR((void *)handle); 2245 2246 if (!zram_can_store_page(zram)) { 2247 zs_free(zram->mem_pool, handle); 2248 return -ENOMEM; 2249 } 2250 2251 src = kmap_local_page(page); 2252 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2253 kunmap_local(src); 2254 2255 slot_lock(zram, index); 2256 slot_free(zram, index); 2257 set_slot_flag(zram, index, ZRAM_HUGE); 2258 set_slot_handle(zram, index, handle); 2259 set_slot_size(zram, index, PAGE_SIZE); 2260 slot_unlock(zram, index); 2261 2262 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2263 atomic64_inc(&zram->stats.huge_pages); 2264 atomic64_inc(&zram->stats.huge_pages_since); 2265 atomic64_inc(&zram->stats.pages_stored); 2266 2267 return 0; 2268 } 2269 2270 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2271 { 2272 int ret = 0; 2273 unsigned long handle; 2274 unsigned int comp_len; 2275 void *mem; 2276 struct zcomp_strm *zstrm; 2277 unsigned long element; 2278 bool same_filled; 2279 2280 mem = kmap_local_page(page); 2281 same_filled = page_same_filled(mem, &element); 2282 kunmap_local(mem); 2283 if (same_filled) 2284 return write_same_filled_page(zram, element, index); 2285 2286 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2287 mem = kmap_local_page(page); 2288 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2289 mem, &comp_len); 2290 kunmap_local(mem); 2291 2292 if (unlikely(ret)) { 2293 zcomp_stream_put(zstrm); 2294 pr_err("Compression failed! err=%d\n", ret); 2295 return ret; 2296 } 2297 2298 if (comp_len >= huge_class_size) { 2299 zcomp_stream_put(zstrm); 2300 return write_incompressible_page(zram, page, index); 2301 } 2302 2303 handle = zs_malloc(zram->mem_pool, comp_len, 2304 GFP_NOIO | __GFP_NOWARN | 2305 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2306 if (IS_ERR_VALUE(handle)) { 2307 zcomp_stream_put(zstrm); 2308 return PTR_ERR((void *)handle); 2309 } 2310 2311 if (!zram_can_store_page(zram)) { 2312 zcomp_stream_put(zstrm); 2313 zs_free(zram->mem_pool, handle); 2314 return -ENOMEM; 2315 } 2316 2317 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2318 zcomp_stream_put(zstrm); 2319 2320 slot_lock(zram, index); 2321 slot_free(zram, index); 2322 set_slot_handle(zram, index, handle); 2323 set_slot_size(zram, index, comp_len); 2324 slot_unlock(zram, index); 2325 2326 /* Update stats */ 2327 atomic64_inc(&zram->stats.pages_stored); 2328 atomic64_add(comp_len, &zram->stats.compr_data_size); 2329 2330 return ret; 2331 } 2332 2333 /* 2334 * This is a partial IO. Read the full page before writing the changes. 2335 */ 2336 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2337 u32 index, int offset) 2338 { 2339 struct page *page = alloc_page(GFP_NOIO); 2340 int ret; 2341 2342 if (!page) 2343 return -ENOMEM; 2344 2345 ret = zram_read_page(zram, page, index, NULL); 2346 if (!ret) { 2347 memcpy_from_bvec(page_address(page) + offset, bvec); 2348 ret = zram_write_page(zram, page, index); 2349 } 2350 __free_page(page); 2351 return ret; 2352 } 2353 2354 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2355 u32 index, int offset) 2356 { 2357 if (is_partial_io(bvec)) 2358 return zram_bvec_write_partial(zram, bvec, index, offset); 2359 return zram_write_page(zram, bvec->bv_page, index); 2360 } 2361 2362 #ifdef CONFIG_ZRAM_MULTI_COMP 2363 #define RECOMPRESS_IDLE (1 << 0) 2364 #define RECOMPRESS_HUGE (1 << 1) 2365 2366 static bool highest_priority_algorithm(struct zram *zram, u32 prio) 2367 { 2368 u32 p; 2369 2370 for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) { 2371 if (zram->comp_algs[p]) 2372 return false; 2373 } 2374 2375 return true; 2376 } 2377 2378 static void scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio, 2379 struct zram_pp_ctl *ctl) 2380 { 2381 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2382 unsigned long index; 2383 2384 for (index = 0; index < nr_pages; index++) { 2385 bool ok = true; 2386 2387 slot_lock(zram, index); 2388 if (!slot_allocated(zram, index)) 2389 goto next; 2390 2391 if (mode & RECOMPRESS_IDLE && 2392 !test_slot_flag(zram, index, ZRAM_IDLE)) 2393 goto next; 2394 2395 if (mode & RECOMPRESS_HUGE && 2396 !test_slot_flag(zram, index, ZRAM_HUGE)) 2397 goto next; 2398 2399 if (test_slot_flag(zram, index, ZRAM_WB) || 2400 test_slot_flag(zram, index, ZRAM_SAME) || 2401 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2402 goto next; 2403 2404 /* Already compressed with same or higher priority */ 2405 if (get_slot_comp_priority(zram, index) >= prio) 2406 goto next; 2407 2408 ok = place_pp_slot(zram, ctl, index); 2409 next: 2410 slot_unlock(zram, index); 2411 if (!ok) 2412 break; 2413 } 2414 } 2415 2416 /* 2417 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2418 * attempt to compress it using provided compression algorithm priority 2419 * (which is potentially more effective). 2420 * 2421 * Corresponding ZRAM slot should be locked. 2422 */ 2423 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2424 u64 *num_recomp_pages, u32 threshold, u32 prio) 2425 { 2426 struct zcomp_strm *zstrm = NULL; 2427 unsigned long handle_old; 2428 unsigned long handle_new; 2429 unsigned int comp_len_old; 2430 unsigned int comp_len_new; 2431 unsigned int class_index_old; 2432 unsigned int class_index_new; 2433 void *src; 2434 int ret = 0; 2435 2436 handle_old = get_slot_handle(zram, index); 2437 if (!handle_old) 2438 return -EINVAL; 2439 2440 comp_len_old = get_slot_size(zram, index); 2441 /* 2442 * Do not recompress objects that are already "small enough". 2443 */ 2444 if (comp_len_old < threshold) 2445 return 0; 2446 2447 ret = read_from_zspool(zram, page, index); 2448 if (ret) 2449 return ret; 2450 2451 /* 2452 * We touched this entry so mark it as non-IDLE. This makes sure that 2453 * we don't preserve IDLE flag and don't incorrectly pick this entry 2454 * for different post-processing type (e.g. writeback). 2455 */ 2456 clear_slot_flag(zram, index, ZRAM_IDLE); 2457 2458 zstrm = zcomp_stream_get(zram->comps[prio]); 2459 src = kmap_local_page(page); 2460 ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new); 2461 kunmap_local(src); 2462 2463 /* 2464 * Decrement the limit (if set) on pages we can recompress, even 2465 * when current recompression was unsuccessful or did not compress 2466 * the page below the threshold, because we still spent resources 2467 * on it. 2468 */ 2469 if (*num_recomp_pages) 2470 *num_recomp_pages -= 1; 2471 2472 if (ret) { 2473 zcomp_stream_put(zstrm); 2474 return ret; 2475 } 2476 2477 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2478 class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new); 2479 2480 if (class_index_new >= class_index_old || 2481 (threshold && comp_len_new >= threshold)) { 2482 zcomp_stream_put(zstrm); 2483 2484 /* 2485 * Secondary algorithms failed to re-compress the page 2486 * in a way that would save memory. 2487 * 2488 * Mark the object incompressible if the max-priority (the 2489 * last configured one) algorithm couldn't re-compress it. 2490 */ 2491 if (highest_priority_algorithm(zram, prio)) 2492 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2493 return 0; 2494 } 2495 2496 /* 2497 * We are holding per-CPU stream mutex and entry lock so better 2498 * avoid direct reclaim. Allocation error is not fatal since 2499 * we still have the old object in the mem_pool. 2500 * 2501 * XXX: technically, the node we really want here is the node that 2502 * holds the original compressed data. But that would require us to 2503 * modify zsmalloc API to return this information. For now, we will 2504 * make do with the node of the page allocated for recompression. 2505 */ 2506 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2507 GFP_NOIO | __GFP_NOWARN | 2508 __GFP_HIGHMEM | __GFP_MOVABLE, 2509 page_to_nid(page)); 2510 if (IS_ERR_VALUE(handle_new)) { 2511 zcomp_stream_put(zstrm); 2512 return PTR_ERR((void *)handle_new); 2513 } 2514 2515 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2516 zcomp_stream_put(zstrm); 2517 2518 slot_free(zram, index); 2519 set_slot_handle(zram, index, handle_new); 2520 set_slot_size(zram, index, comp_len_new); 2521 set_slot_comp_priority(zram, index, prio); 2522 2523 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2524 atomic64_inc(&zram->stats.pages_stored); 2525 2526 return 0; 2527 } 2528 2529 static ssize_t recompress_store(struct device *dev, 2530 struct device_attribute *attr, 2531 const char *buf, size_t len) 2532 { 2533 struct zram *zram = dev_to_zram(dev); 2534 char *args, *param, *val, *algo = NULL; 2535 u64 num_recomp_pages = ULLONG_MAX; 2536 struct zram_pp_ctl *ctl = NULL; 2537 s32 prio = ZRAM_SECONDARY_COMP; 2538 u32 mode = 0, threshold = 0; 2539 struct zram_pp_slot *pps; 2540 struct page *page = NULL; 2541 bool prio_param = false; 2542 ssize_t ret; 2543 2544 args = skip_spaces(buf); 2545 while (*args) { 2546 args = next_arg(args, ¶m, &val); 2547 2548 if (!val || !*val) 2549 return -EINVAL; 2550 2551 if (!strcmp(param, "type")) { 2552 if (!strcmp(val, "idle")) 2553 mode = RECOMPRESS_IDLE; 2554 if (!strcmp(val, "huge")) 2555 mode = RECOMPRESS_HUGE; 2556 if (!strcmp(val, "huge_idle")) 2557 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2558 if (!mode) 2559 return -EINVAL; 2560 continue; 2561 } 2562 2563 if (!strcmp(param, "max_pages")) { 2564 /* 2565 * Limit the number of entries (pages) we attempt to 2566 * recompress. 2567 */ 2568 ret = kstrtoull(val, 10, &num_recomp_pages); 2569 if (ret) 2570 return ret; 2571 continue; 2572 } 2573 2574 if (!strcmp(param, "threshold")) { 2575 /* 2576 * We will re-compress only idle objects equal or 2577 * greater in size than watermark. 2578 */ 2579 ret = kstrtouint(val, 10, &threshold); 2580 if (ret) 2581 return ret; 2582 continue; 2583 } 2584 2585 if (!strcmp(param, "algo")) { 2586 algo = val; 2587 continue; 2588 } 2589 2590 if (!strcmp(param, "priority")) { 2591 prio_param = true; 2592 ret = kstrtoint(val, 10, &prio); 2593 if (ret) 2594 return ret; 2595 continue; 2596 } 2597 } 2598 2599 if (threshold >= huge_class_size) 2600 return -EINVAL; 2601 2602 guard(rwsem_write)(&zram->dev_lock); 2603 if (!init_done(zram)) 2604 return -EINVAL; 2605 2606 if (prio_param) { 2607 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 2608 return -EINVAL; 2609 } 2610 2611 if (algo && prio_param) { 2612 ret = validate_algo_priority(zram, algo, prio); 2613 if (ret) 2614 return ret; 2615 } 2616 2617 if (algo && !prio_param) { 2618 prio = lookup_algo_priority(zram, algo, ZRAM_SECONDARY_COMP); 2619 if (prio < 0) 2620 return -EINVAL; 2621 } 2622 2623 if (!zram->comps[prio]) 2624 return -EINVAL; 2625 2626 page = alloc_page(GFP_KERNEL); 2627 if (!page) { 2628 ret = -ENOMEM; 2629 goto out; 2630 } 2631 2632 ctl = init_pp_ctl(); 2633 if (!ctl) { 2634 ret = -ENOMEM; 2635 goto out; 2636 } 2637 2638 scan_slots_for_recompress(zram, mode, prio, ctl); 2639 2640 ret = len; 2641 while ((pps = select_pp_slot(ctl))) { 2642 int err = 0; 2643 2644 if (!num_recomp_pages) 2645 break; 2646 2647 slot_lock(zram, pps->index); 2648 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) 2649 goto next; 2650 2651 err = recompress_slot(zram, pps->index, page, 2652 &num_recomp_pages, threshold, prio); 2653 next: 2654 slot_unlock(zram, pps->index); 2655 release_pp_slot(zram, pps); 2656 2657 if (err) { 2658 ret = err; 2659 break; 2660 } 2661 2662 cond_resched(); 2663 } 2664 2665 out: 2666 if (page) 2667 __free_page(page); 2668 release_pp_ctl(zram, ctl); 2669 return ret; 2670 } 2671 #endif 2672 2673 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2674 { 2675 size_t n = bio->bi_iter.bi_size; 2676 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2677 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2678 SECTOR_SHIFT; 2679 2680 /* 2681 * zram manages data in physical block size units. Because logical block 2682 * size isn't identical with physical block size on some arch, we 2683 * could get a discard request pointing to a specific offset within a 2684 * certain physical block. Although we can handle this request by 2685 * reading that physiclal block and decompressing and partially zeroing 2686 * and re-compressing and then re-storing it, this isn't reasonable 2687 * because our intent with a discard request is to save memory. So 2688 * skipping this logical block is appropriate here. 2689 */ 2690 if (offset) { 2691 if (n <= (PAGE_SIZE - offset)) 2692 goto end_bio; 2693 2694 n -= (PAGE_SIZE - offset); 2695 index++; 2696 } 2697 2698 while (n >= PAGE_SIZE) { 2699 slot_lock(zram, index); 2700 slot_free(zram, index); 2701 slot_unlock(zram, index); 2702 atomic64_inc(&zram->stats.notify_free); 2703 index++; 2704 n -= PAGE_SIZE; 2705 } 2706 2707 end_bio: 2708 bio_endio(bio); 2709 } 2710 2711 static void zram_bio_read(struct zram *zram, struct bio *bio) 2712 { 2713 unsigned long start_time = bio_start_io_acct(bio); 2714 struct bvec_iter iter = bio->bi_iter; 2715 2716 do { 2717 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2718 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2719 SECTOR_SHIFT; 2720 struct bio_vec bv = bio_iter_iovec(bio, iter); 2721 2722 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2723 2724 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2725 atomic64_inc(&zram->stats.failed_reads); 2726 bio->bi_status = BLK_STS_IOERR; 2727 break; 2728 } 2729 flush_dcache_page(bv.bv_page); 2730 2731 slot_lock(zram, index); 2732 mark_slot_accessed(zram, index); 2733 slot_unlock(zram, index); 2734 2735 bio_advance_iter_single(bio, &iter, bv.bv_len); 2736 } while (iter.bi_size); 2737 2738 bio_end_io_acct(bio, start_time); 2739 bio_endio(bio); 2740 } 2741 2742 static void zram_bio_write(struct zram *zram, struct bio *bio) 2743 { 2744 unsigned long start_time = bio_start_io_acct(bio); 2745 struct bvec_iter iter = bio->bi_iter; 2746 2747 do { 2748 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2749 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2750 SECTOR_SHIFT; 2751 struct bio_vec bv = bio_iter_iovec(bio, iter); 2752 2753 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2754 2755 if (zram_bvec_write(zram, &bv, index, offset) < 0) { 2756 atomic64_inc(&zram->stats.failed_writes); 2757 bio->bi_status = BLK_STS_IOERR; 2758 break; 2759 } 2760 2761 slot_lock(zram, index); 2762 mark_slot_accessed(zram, index); 2763 slot_unlock(zram, index); 2764 2765 bio_advance_iter_single(bio, &iter, bv.bv_len); 2766 } while (iter.bi_size); 2767 2768 bio_end_io_acct(bio, start_time); 2769 bio_endio(bio); 2770 } 2771 2772 /* 2773 * Handler function for all zram I/O requests. 2774 */ 2775 static void zram_submit_bio(struct bio *bio) 2776 { 2777 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2778 2779 switch (bio_op(bio)) { 2780 case REQ_OP_READ: 2781 zram_bio_read(zram, bio); 2782 break; 2783 case REQ_OP_WRITE: 2784 zram_bio_write(zram, bio); 2785 break; 2786 case REQ_OP_DISCARD: 2787 case REQ_OP_WRITE_ZEROES: 2788 zram_bio_discard(zram, bio); 2789 break; 2790 default: 2791 WARN_ON_ONCE(1); 2792 bio_endio(bio); 2793 } 2794 } 2795 2796 static void zram_slot_free_notify(struct block_device *bdev, 2797 unsigned long index) 2798 { 2799 struct zram *zram; 2800 2801 zram = bdev->bd_disk->private_data; 2802 2803 atomic64_inc(&zram->stats.notify_free); 2804 if (!slot_trylock(zram, index)) { 2805 atomic64_inc(&zram->stats.miss_free); 2806 return; 2807 } 2808 2809 slot_free(zram, index); 2810 slot_unlock(zram, index); 2811 } 2812 2813 static void zram_comp_params_reset(struct zram *zram) 2814 { 2815 u32 prio; 2816 2817 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2818 comp_params_reset(zram, prio); 2819 } 2820 } 2821 2822 static void zram_destroy_comps(struct zram *zram) 2823 { 2824 u32 prio; 2825 2826 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2827 struct zcomp *comp = zram->comps[prio]; 2828 2829 zram->comps[prio] = NULL; 2830 if (!comp) 2831 continue; 2832 zcomp_destroy(comp); 2833 } 2834 2835 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) 2836 zram->comp_algs[prio] = NULL; 2837 2838 zram_comp_params_reset(zram); 2839 } 2840 2841 static void zram_reset_device(struct zram *zram) 2842 { 2843 guard(rwsem_write)(&zram->dev_lock); 2844 2845 zram->limit_pages = 0; 2846 2847 set_capacity_and_notify(zram->disk, 0); 2848 part_stat_set_all(zram->disk->part0, 0); 2849 2850 /* I/O operation under all of CPU are done so let's free */ 2851 zram_meta_free(zram, zram->disksize); 2852 zram->disksize = 0; 2853 zram_destroy_comps(zram); 2854 memset(&zram->stats, 0, sizeof(zram->stats)); 2855 reset_bdev(zram); 2856 2857 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2858 } 2859 2860 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, 2861 const char *buf, size_t len) 2862 { 2863 u64 disksize; 2864 struct zcomp *comp; 2865 struct zram *zram = dev_to_zram(dev); 2866 int err; 2867 u32 prio; 2868 2869 disksize = memparse(buf, NULL); 2870 if (!disksize) 2871 return -EINVAL; 2872 2873 guard(rwsem_write)(&zram->dev_lock); 2874 if (init_done(zram)) { 2875 pr_info("Cannot change disksize for initialized device\n"); 2876 return -EBUSY; 2877 } 2878 2879 disksize = PAGE_ALIGN(disksize); 2880 if (!zram_meta_alloc(zram, disksize)) 2881 return -ENOMEM; 2882 2883 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2884 if (!zram->comp_algs[prio]) 2885 continue; 2886 2887 comp = zcomp_create(zram->comp_algs[prio], 2888 &zram->params[prio]); 2889 if (IS_ERR(comp)) { 2890 pr_err("Cannot initialise %s compressing backend\n", 2891 zram->comp_algs[prio]); 2892 err = PTR_ERR(comp); 2893 goto out_free_comps; 2894 } 2895 2896 zram->comps[prio] = comp; 2897 } 2898 zram->disksize = disksize; 2899 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2900 2901 return len; 2902 2903 out_free_comps: 2904 zram_destroy_comps(zram); 2905 zram_meta_free(zram, disksize); 2906 return err; 2907 } 2908 2909 static ssize_t reset_store(struct device *dev, 2910 struct device_attribute *attr, const char *buf, size_t len) 2911 { 2912 int ret; 2913 unsigned short do_reset; 2914 struct zram *zram; 2915 struct gendisk *disk; 2916 2917 ret = kstrtou16(buf, 10, &do_reset); 2918 if (ret) 2919 return ret; 2920 2921 if (!do_reset) 2922 return -EINVAL; 2923 2924 zram = dev_to_zram(dev); 2925 disk = zram->disk; 2926 2927 mutex_lock(&disk->open_mutex); 2928 /* Do not reset an active device or claimed device */ 2929 if (disk_openers(disk) || zram->claim) { 2930 mutex_unlock(&disk->open_mutex); 2931 return -EBUSY; 2932 } 2933 2934 /* From now on, anyone can't open /dev/zram[0-9] */ 2935 zram->claim = true; 2936 mutex_unlock(&disk->open_mutex); 2937 2938 /* Make sure all the pending I/O are finished */ 2939 sync_blockdev(disk->part0); 2940 zram_reset_device(zram); 2941 2942 mutex_lock(&disk->open_mutex); 2943 zram->claim = false; 2944 mutex_unlock(&disk->open_mutex); 2945 2946 return len; 2947 } 2948 2949 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2950 { 2951 struct zram *zram = disk->private_data; 2952 2953 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2954 2955 /* zram was claimed to reset so open request fails */ 2956 if (zram->claim) 2957 return -EBUSY; 2958 return 0; 2959 } 2960 2961 static const struct block_device_operations zram_devops = { 2962 .open = zram_open, 2963 .submit_bio = zram_submit_bio, 2964 .swap_slot_free_notify = zram_slot_free_notify, 2965 .owner = THIS_MODULE 2966 }; 2967 2968 static DEVICE_ATTR_RO(io_stat); 2969 static DEVICE_ATTR_RO(mm_stat); 2970 static DEVICE_ATTR_RO(debug_stat); 2971 static DEVICE_ATTR_WO(compact); 2972 static DEVICE_ATTR_RW(disksize); 2973 static DEVICE_ATTR_RO(initstate); 2974 static DEVICE_ATTR_WO(reset); 2975 static DEVICE_ATTR_WO(mem_limit); 2976 static DEVICE_ATTR_WO(mem_used_max); 2977 static DEVICE_ATTR_WO(idle); 2978 static DEVICE_ATTR_RW(comp_algorithm); 2979 #ifdef CONFIG_ZRAM_WRITEBACK 2980 static DEVICE_ATTR_RO(bd_stat); 2981 static DEVICE_ATTR_RW(backing_dev); 2982 static DEVICE_ATTR_WO(writeback); 2983 static DEVICE_ATTR_RW(writeback_limit); 2984 static DEVICE_ATTR_RW(writeback_limit_enable); 2985 static DEVICE_ATTR_RW(writeback_batch_size); 2986 static DEVICE_ATTR_RW(compressed_writeback); 2987 #endif 2988 #ifdef CONFIG_ZRAM_MULTI_COMP 2989 static DEVICE_ATTR_RW(recomp_algorithm); 2990 static DEVICE_ATTR_WO(recompress); 2991 #endif 2992 static DEVICE_ATTR_WO(algorithm_params); 2993 2994 static struct attribute *zram_disk_attrs[] = { 2995 &dev_attr_disksize.attr, 2996 &dev_attr_initstate.attr, 2997 &dev_attr_reset.attr, 2998 &dev_attr_compact.attr, 2999 &dev_attr_mem_limit.attr, 3000 &dev_attr_mem_used_max.attr, 3001 &dev_attr_idle.attr, 3002 &dev_attr_comp_algorithm.attr, 3003 #ifdef CONFIG_ZRAM_WRITEBACK 3004 &dev_attr_bd_stat.attr, 3005 &dev_attr_backing_dev.attr, 3006 &dev_attr_writeback.attr, 3007 &dev_attr_writeback_limit.attr, 3008 &dev_attr_writeback_limit_enable.attr, 3009 &dev_attr_writeback_batch_size.attr, 3010 &dev_attr_compressed_writeback.attr, 3011 #endif 3012 &dev_attr_io_stat.attr, 3013 &dev_attr_mm_stat.attr, 3014 &dev_attr_debug_stat.attr, 3015 #ifdef CONFIG_ZRAM_MULTI_COMP 3016 &dev_attr_recomp_algorithm.attr, 3017 &dev_attr_recompress.attr, 3018 #endif 3019 &dev_attr_algorithm_params.attr, 3020 NULL, 3021 }; 3022 3023 ATTRIBUTE_GROUPS(zram_disk); 3024 3025 /* 3026 * Allocate and initialize new zram device. the function returns 3027 * '>= 0' device_id upon success, and negative value otherwise. 3028 */ 3029 static int zram_add(void) 3030 { 3031 struct queue_limits lim = { 3032 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 3033 /* 3034 * To ensure that we always get PAGE_SIZE aligned and 3035 * n*PAGE_SIZED sized I/O requests. 3036 */ 3037 .physical_block_size = PAGE_SIZE, 3038 .io_min = PAGE_SIZE, 3039 .io_opt = PAGE_SIZE, 3040 .max_hw_discard_sectors = UINT_MAX, 3041 /* 3042 * zram_bio_discard() will clear all logical blocks if logical 3043 * block size is identical with physical block size(PAGE_SIZE). 3044 * But if it is different, we will skip discarding some parts of 3045 * logical blocks in the part of the request range which isn't 3046 * aligned to physical block size. So we can't ensure that all 3047 * discarded logical blocks are zeroed. 3048 */ 3049 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 3050 .max_write_zeroes_sectors = UINT_MAX, 3051 #endif 3052 .features = BLK_FEAT_STABLE_WRITES | 3053 BLK_FEAT_SYNCHRONOUS, 3054 }; 3055 struct zram *zram; 3056 int ret, device_id; 3057 3058 zram = kzalloc_obj(struct zram); 3059 if (!zram) 3060 return -ENOMEM; 3061 3062 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 3063 if (ret < 0) 3064 goto out_free_dev; 3065 device_id = ret; 3066 3067 init_rwsem(&zram->dev_lock); 3068 #ifdef CONFIG_ZRAM_WRITEBACK 3069 zram->wb_batch_size = 32; 3070 zram->compressed_wb = false; 3071 #endif 3072 3073 /* gendisk structure */ 3074 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 3075 if (IS_ERR(zram->disk)) { 3076 pr_err("Error allocating disk structure for device %d\n", 3077 device_id); 3078 ret = PTR_ERR(zram->disk); 3079 goto out_free_idr; 3080 } 3081 3082 zram->disk->major = zram_major; 3083 zram->disk->first_minor = device_id; 3084 zram->disk->minors = 1; 3085 zram->disk->flags |= GENHD_FL_NO_PART; 3086 zram->disk->fops = &zram_devops; 3087 zram->disk->private_data = zram; 3088 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 3089 zram_comp_params_reset(zram); 3090 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 3091 3092 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 3093 set_capacity(zram->disk, 0); 3094 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 3095 if (ret) 3096 goto out_cleanup_disk; 3097 3098 zram_debugfs_register(zram); 3099 pr_info("Added device: %s\n", zram->disk->disk_name); 3100 return device_id; 3101 3102 out_cleanup_disk: 3103 put_disk(zram->disk); 3104 out_free_idr: 3105 idr_remove(&zram_index_idr, device_id); 3106 out_free_dev: 3107 kfree(zram); 3108 return ret; 3109 } 3110 3111 static int zram_remove(struct zram *zram) 3112 { 3113 bool claimed; 3114 3115 mutex_lock(&zram->disk->open_mutex); 3116 if (disk_openers(zram->disk)) { 3117 mutex_unlock(&zram->disk->open_mutex); 3118 return -EBUSY; 3119 } 3120 3121 claimed = zram->claim; 3122 if (!claimed) 3123 zram->claim = true; 3124 mutex_unlock(&zram->disk->open_mutex); 3125 3126 zram_debugfs_unregister(zram); 3127 3128 if (claimed) { 3129 /* 3130 * If we were claimed by reset_store(), del_gendisk() will 3131 * wait until reset_store() is done, so nothing need to do. 3132 */ 3133 ; 3134 } else { 3135 /* Make sure all the pending I/O are finished */ 3136 sync_blockdev(zram->disk->part0); 3137 zram_reset_device(zram); 3138 } 3139 3140 pr_info("Removed device: %s\n", zram->disk->disk_name); 3141 3142 del_gendisk(zram->disk); 3143 3144 /* del_gendisk drains pending reset_store */ 3145 WARN_ON_ONCE(claimed && zram->claim); 3146 3147 /* 3148 * disksize_store() may be called in between zram_reset_device() 3149 * and del_gendisk(), so run the last reset to avoid leaking 3150 * anything allocated with disksize_store() 3151 */ 3152 zram_reset_device(zram); 3153 3154 put_disk(zram->disk); 3155 kfree(zram); 3156 return 0; 3157 } 3158 3159 /* zram-control sysfs attributes */ 3160 3161 /* 3162 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3163 * sense that reading from this file does alter the state of your system -- it 3164 * creates a new un-initialized zram device and returns back this device's 3165 * device_id (or an error code if it fails to create a new device). 3166 */ 3167 static ssize_t hot_add_show(const struct class *class, 3168 const struct class_attribute *attr, 3169 char *buf) 3170 { 3171 int ret; 3172 3173 mutex_lock(&zram_index_mutex); 3174 ret = zram_add(); 3175 mutex_unlock(&zram_index_mutex); 3176 3177 if (ret < 0) 3178 return ret; 3179 return sysfs_emit(buf, "%d\n", ret); 3180 } 3181 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3182 static struct class_attribute class_attr_hot_add = 3183 __ATTR(hot_add, 0400, hot_add_show, NULL); 3184 3185 static ssize_t hot_remove_store(const struct class *class, 3186 const struct class_attribute *attr, 3187 const char *buf, 3188 size_t count) 3189 { 3190 struct zram *zram; 3191 int ret, dev_id; 3192 3193 /* dev_id is gendisk->first_minor, which is `int' */ 3194 ret = kstrtoint(buf, 10, &dev_id); 3195 if (ret) 3196 return ret; 3197 if (dev_id < 0) 3198 return -EINVAL; 3199 3200 mutex_lock(&zram_index_mutex); 3201 3202 zram = idr_find(&zram_index_idr, dev_id); 3203 if (zram) { 3204 ret = zram_remove(zram); 3205 if (!ret) 3206 idr_remove(&zram_index_idr, dev_id); 3207 } else { 3208 ret = -ENODEV; 3209 } 3210 3211 mutex_unlock(&zram_index_mutex); 3212 return ret ? ret : count; 3213 } 3214 static CLASS_ATTR_WO(hot_remove); 3215 3216 static struct attribute *zram_control_class_attrs[] = { 3217 &class_attr_hot_add.attr, 3218 &class_attr_hot_remove.attr, 3219 NULL, 3220 }; 3221 ATTRIBUTE_GROUPS(zram_control_class); 3222 3223 static struct class zram_control_class = { 3224 .name = "zram-control", 3225 .class_groups = zram_control_class_groups, 3226 }; 3227 3228 static int zram_remove_cb(int id, void *ptr, void *data) 3229 { 3230 WARN_ON_ONCE(zram_remove(ptr)); 3231 return 0; 3232 } 3233 3234 static void destroy_devices(void) 3235 { 3236 class_unregister(&zram_control_class); 3237 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3238 zram_debugfs_destroy(); 3239 idr_destroy(&zram_index_idr); 3240 unregister_blkdev(zram_major, "zram"); 3241 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3242 } 3243 3244 static int __init zram_init(void) 3245 { 3246 struct zram_table_entry zram_te; 3247 int ret; 3248 3249 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); 3250 3251 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3252 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3253 if (ret < 0) 3254 return ret; 3255 3256 ret = class_register(&zram_control_class); 3257 if (ret) { 3258 pr_err("Unable to register zram-control class\n"); 3259 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3260 return ret; 3261 } 3262 3263 zram_debugfs_create(); 3264 zram_major = register_blkdev(0, "zram"); 3265 if (zram_major <= 0) { 3266 pr_err("Unable to get major number\n"); 3267 class_unregister(&zram_control_class); 3268 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3269 return -EBUSY; 3270 } 3271 3272 while (num_devices != 0) { 3273 mutex_lock(&zram_index_mutex); 3274 ret = zram_add(); 3275 mutex_unlock(&zram_index_mutex); 3276 if (ret < 0) 3277 goto out_error; 3278 num_devices--; 3279 } 3280 3281 return 0; 3282 3283 out_error: 3284 destroy_devices(); 3285 return ret; 3286 } 3287 3288 static void __exit zram_exit(void) 3289 { 3290 destroy_devices(); 3291 } 3292 3293 module_init(zram_init); 3294 module_exit(zram_exit); 3295 3296 module_param(num_devices, uint, 0); 3297 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3298 3299 MODULE_LICENSE("Dual BSD/GPL"); 3300 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3301 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3302