1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define pr_fmt(fmt) "zram: " fmt 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/bio.h> 20 #include <linux/bitops.h> 21 #include <linux/blkdev.h> 22 #include <linux/buffer_head.h> 23 #include <linux/device.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/backing-dev.h> 27 #include <linux/string.h> 28 #include <linux/vmalloc.h> 29 #include <linux/err.h> 30 #include <linux/idr.h> 31 #include <linux/sysfs.h> 32 #include <linux/debugfs.h> 33 #include <linux/cpuhotplug.h> 34 #include <linux/part_stat.h> 35 #include <linux/kernel_read_file.h> 36 37 #include "zram_drv.h" 38 39 static DEFINE_IDR(zram_index_idr); 40 /* idr index must be protected */ 41 static DEFINE_MUTEX(zram_index_mutex); 42 43 static int zram_major; 44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 45 46 #define ZRAM_MAX_ALGO_NAME_SZ 128 47 48 /* Module params (documentation at end) */ 49 static unsigned int num_devices = 1; 50 /* 51 * Pages that compress to sizes equals or greater than this are stored 52 * uncompressed in memory. 53 */ 54 static size_t huge_class_size; 55 56 static const struct block_device_operations zram_devops; 57 58 static void slot_free(struct zram *zram, u32 index); 59 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 60 61 static void slot_lock_init(struct zram *zram, u32 index) 62 { 63 static struct lock_class_key __key; 64 65 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 66 &__key, 0); 67 } 68 69 /* 70 * entry locking rules: 71 * 72 * 1) Lock is exclusive 73 * 74 * 2) lock() function can sleep waiting for the lock 75 * 76 * 3) Lock owner can sleep 77 * 78 * 4) Use TRY lock variant when in atomic context 79 * - must check return value and handle locking failers 80 */ 81 static __must_check bool slot_trylock(struct zram *zram, u32 index) 82 { 83 unsigned long *lock = &zram->table[index].__lock; 84 85 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 86 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 87 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 88 return true; 89 } 90 91 return false; 92 } 93 94 static void slot_lock(struct zram *zram, u32 index) 95 { 96 unsigned long *lock = &zram->table[index].__lock; 97 98 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 99 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 100 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 101 } 102 103 static void slot_unlock(struct zram *zram, u32 index) 104 { 105 unsigned long *lock = &zram->table[index].__lock; 106 107 mutex_release(slot_dep_map(zram, index), _RET_IP_); 108 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 109 } 110 111 static inline bool init_done(struct zram *zram) 112 { 113 return zram->disksize; 114 } 115 116 static inline struct zram *dev_to_zram(struct device *dev) 117 { 118 return (struct zram *)dev_to_disk(dev)->private_data; 119 } 120 121 static unsigned long get_slot_handle(struct zram *zram, u32 index) 122 { 123 return zram->table[index].handle; 124 } 125 126 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) 127 { 128 zram->table[index].handle = handle; 129 } 130 131 static bool test_slot_flag(struct zram *zram, u32 index, 132 enum zram_pageflags flag) 133 { 134 return zram->table[index].attr.flags & BIT(flag); 135 } 136 137 static void set_slot_flag(struct zram *zram, u32 index, 138 enum zram_pageflags flag) 139 { 140 zram->table[index].attr.flags |= BIT(flag); 141 } 142 143 static void clear_slot_flag(struct zram *zram, u32 index, 144 enum zram_pageflags flag) 145 { 146 zram->table[index].attr.flags &= ~BIT(flag); 147 } 148 149 static size_t get_slot_size(struct zram *zram, u32 index) 150 { 151 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 152 } 153 154 static void set_slot_size(struct zram *zram, u32 index, size_t size) 155 { 156 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; 157 158 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; 159 } 160 161 static inline bool slot_allocated(struct zram *zram, u32 index) 162 { 163 return get_slot_size(zram, index) || 164 test_slot_flag(zram, index, ZRAM_SAME) || 165 test_slot_flag(zram, index, ZRAM_WB); 166 } 167 168 static inline void set_slot_comp_priority(struct zram *zram, u32 index, 169 u32 prio) 170 { 171 prio &= ZRAM_COMP_PRIORITY_MASK; 172 /* 173 * Clear previous priority value first, in case if we recompress 174 * further an already recompressed page 175 */ 176 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << 177 ZRAM_COMP_PRIORITY_BIT1); 178 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 179 } 180 181 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) 182 { 183 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; 184 185 return prio & ZRAM_COMP_PRIORITY_MASK; 186 } 187 188 static void mark_slot_accessed(struct zram *zram, u32 index) 189 { 190 clear_slot_flag(zram, index, ZRAM_IDLE); 191 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 193 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); 194 #endif 195 } 196 197 static inline void update_used_max(struct zram *zram, const unsigned long pages) 198 { 199 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 200 201 do { 202 if (cur_max >= pages) 203 return; 204 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 205 &cur_max, pages)); 206 } 207 208 static bool zram_can_store_page(struct zram *zram) 209 { 210 unsigned long alloced_pages; 211 212 alloced_pages = zs_get_total_pages(zram->mem_pool); 213 update_used_max(zram, alloced_pages); 214 215 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 216 } 217 218 #if PAGE_SIZE != 4096 219 static inline bool is_partial_io(struct bio_vec *bvec) 220 { 221 return bvec->bv_len != PAGE_SIZE; 222 } 223 #define ZRAM_PARTIAL_IO 1 224 #else 225 static inline bool is_partial_io(struct bio_vec *bvec) 226 { 227 return false; 228 } 229 #endif 230 231 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 232 struct zram_pp_slot { 233 unsigned long index; 234 struct list_head entry; 235 }; 236 237 /* 238 * A post-processing bucket is, essentially, a size class, this defines 239 * the range (in bytes) of pp-slots sizes in particular bucket. 240 */ 241 #define PP_BUCKET_SIZE_RANGE 64 242 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 243 244 struct zram_pp_ctl { 245 struct list_head pp_buckets[NUM_PP_BUCKETS]; 246 }; 247 248 static struct zram_pp_ctl *init_pp_ctl(void) 249 { 250 struct zram_pp_ctl *ctl; 251 u32 idx; 252 253 ctl = kmalloc_obj(*ctl); 254 if (!ctl) 255 return NULL; 256 257 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 258 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 259 return ctl; 260 } 261 262 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 263 { 264 list_del_init(&pps->entry); 265 266 slot_lock(zram, pps->index); 267 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 268 slot_unlock(zram, pps->index); 269 270 kfree(pps); 271 } 272 273 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 274 { 275 u32 idx; 276 277 if (!ctl) 278 return; 279 280 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 281 while (!list_empty(&ctl->pp_buckets[idx])) { 282 struct zram_pp_slot *pps; 283 284 pps = list_first_entry(&ctl->pp_buckets[idx], 285 struct zram_pp_slot, 286 entry); 287 release_pp_slot(zram, pps); 288 } 289 } 290 291 kfree(ctl); 292 } 293 294 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 295 u32 index) 296 { 297 struct zram_pp_slot *pps; 298 u32 bid; 299 300 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN); 301 if (!pps) 302 return false; 303 304 INIT_LIST_HEAD(&pps->entry); 305 pps->index = index; 306 307 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 308 list_add(&pps->entry, &ctl->pp_buckets[bid]); 309 310 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 311 return true; 312 } 313 314 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 315 { 316 struct zram_pp_slot *pps = NULL; 317 s32 idx = NUM_PP_BUCKETS - 1; 318 319 /* The higher the bucket id the more optimal slot post-processing is */ 320 while (idx >= 0) { 321 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 322 struct zram_pp_slot, 323 entry); 324 if (pps) 325 break; 326 327 idx--; 328 } 329 return pps; 330 } 331 #endif 332 333 static inline void zram_fill_page(void *ptr, unsigned long len, 334 unsigned long value) 335 { 336 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 337 memset_l(ptr, value, len / sizeof(unsigned long)); 338 } 339 340 static bool page_same_filled(void *ptr, unsigned long *element) 341 { 342 unsigned long *page; 343 unsigned long val; 344 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 345 346 page = (unsigned long *)ptr; 347 val = page[0]; 348 349 if (val != page[last_pos]) 350 return false; 351 352 for (pos = 1; pos < last_pos; pos++) { 353 if (val != page[pos]) 354 return false; 355 } 356 357 *element = val; 358 359 return true; 360 } 361 362 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, 363 char *buf) 364 { 365 u32 val; 366 struct zram *zram = dev_to_zram(dev); 367 368 guard(rwsem_read)(&zram->dev_lock); 369 val = init_done(zram); 370 371 return sysfs_emit(buf, "%u\n", val); 372 } 373 374 static ssize_t disksize_show(struct device *dev, 375 struct device_attribute *attr, char *buf) 376 { 377 struct zram *zram = dev_to_zram(dev); 378 379 return sysfs_emit(buf, "%llu\n", zram->disksize); 380 } 381 382 static ssize_t mem_limit_store(struct device *dev, 383 struct device_attribute *attr, const char *buf, 384 size_t len) 385 { 386 u64 limit; 387 char *tmp; 388 struct zram *zram = dev_to_zram(dev); 389 390 limit = memparse(buf, &tmp); 391 if (buf == tmp) /* no chars parsed, invalid input */ 392 return -EINVAL; 393 394 guard(rwsem_write)(&zram->dev_lock); 395 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 396 397 return len; 398 } 399 400 static ssize_t mem_used_max_store(struct device *dev, 401 struct device_attribute *attr, 402 const char *buf, size_t len) 403 { 404 int err; 405 unsigned long val; 406 struct zram *zram = dev_to_zram(dev); 407 408 err = kstrtoul(buf, 10, &val); 409 if (err || val != 0) 410 return -EINVAL; 411 412 guard(rwsem_read)(&zram->dev_lock); 413 if (init_done(zram)) { 414 atomic_long_set(&zram->stats.max_used_pages, 415 zs_get_total_pages(zram->mem_pool)); 416 } 417 418 return len; 419 } 420 421 /* 422 * Mark all pages which are older than or equal to cutoff as IDLE. 423 * Callers should hold the zram init lock in read mode 424 */ 425 static void mark_idle(struct zram *zram, ktime_t cutoff) 426 { 427 int is_idle = 1; 428 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 429 int index; 430 431 for (index = 0; index < nr_pages; index++) { 432 /* 433 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 434 * post-processing (recompress, writeback) happens to the 435 * ZRAM_SAME slot. 436 * 437 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 438 */ 439 slot_lock(zram, index); 440 if (!slot_allocated(zram, index) || 441 test_slot_flag(zram, index, ZRAM_WB) || 442 test_slot_flag(zram, index, ZRAM_SAME)) { 443 slot_unlock(zram, index); 444 continue; 445 } 446 447 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 448 is_idle = !cutoff || 449 ktime_after(cutoff, zram->table[index].attr.ac_time); 450 #endif 451 if (is_idle) 452 set_slot_flag(zram, index, ZRAM_IDLE); 453 else 454 clear_slot_flag(zram, index, ZRAM_IDLE); 455 slot_unlock(zram, index); 456 } 457 } 458 459 static ssize_t idle_store(struct device *dev, struct device_attribute *attr, 460 const char *buf, size_t len) 461 { 462 struct zram *zram = dev_to_zram(dev); 463 ktime_t cutoff = 0; 464 465 if (!sysfs_streq(buf, "all")) { 466 /* 467 * If it did not parse as 'all' try to treat it as an integer 468 * when we have memory tracking enabled. 469 */ 470 u32 age_sec; 471 472 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && 473 !kstrtouint(buf, 0, &age_sec)) 474 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), 475 age_sec); 476 else 477 return -EINVAL; 478 } 479 480 guard(rwsem_read)(&zram->dev_lock); 481 if (!init_done(zram)) 482 return -EINVAL; 483 484 /* 485 * A cutoff of 0 marks everything as idle, this is the 486 * "all" behavior. 487 */ 488 mark_idle(zram, cutoff); 489 return len; 490 } 491 492 #ifdef CONFIG_ZRAM_WRITEBACK 493 #define INVALID_BDEV_BLOCK (~0UL) 494 495 static int read_from_zspool_raw(struct zram *zram, struct page *page, 496 u32 index); 497 static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 498 499 struct zram_wb_ctl { 500 /* idle list is accessed only by the writeback task, no concurency */ 501 struct list_head idle_reqs; 502 /* done list is accessed concurrently, protect by done_lock */ 503 struct list_head done_reqs; 504 wait_queue_head_t done_wait; 505 spinlock_t done_lock; 506 atomic_t num_inflight; 507 }; 508 509 struct zram_wb_req { 510 unsigned long blk_idx; 511 struct page *page; 512 struct zram_pp_slot *pps; 513 struct bio_vec bio_vec; 514 struct bio bio; 515 516 struct list_head entry; 517 }; 518 519 struct zram_rb_req { 520 struct work_struct work; 521 struct zram *zram; 522 struct page *page; 523 /* The read bio for backing device */ 524 struct bio *bio; 525 unsigned long blk_idx; 526 union { 527 /* The original bio to complete (async read) */ 528 struct bio *parent; 529 /* error status (sync read) */ 530 int error; 531 }; 532 u32 index; 533 }; 534 535 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 536 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, 537 char *buf) 538 { 539 struct zram *zram = dev_to_zram(dev); 540 ssize_t ret; 541 542 guard(rwsem_read)(&zram->dev_lock); 543 ret = sysfs_emit(buf, 544 "%8llu %8llu %8llu\n", 545 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 546 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 547 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 548 549 return ret; 550 } 551 552 static ssize_t compressed_writeback_store(struct device *dev, 553 struct device_attribute *attr, 554 const char *buf, size_t len) 555 { 556 struct zram *zram = dev_to_zram(dev); 557 bool val; 558 559 if (kstrtobool(buf, &val)) 560 return -EINVAL; 561 562 guard(rwsem_write)(&zram->dev_lock); 563 if (init_done(zram)) { 564 return -EBUSY; 565 } 566 567 zram->compressed_wb = val; 568 569 return len; 570 } 571 572 static ssize_t compressed_writeback_show(struct device *dev, 573 struct device_attribute *attr, 574 char *buf) 575 { 576 bool val; 577 struct zram *zram = dev_to_zram(dev); 578 579 guard(rwsem_read)(&zram->dev_lock); 580 val = zram->compressed_wb; 581 582 return sysfs_emit(buf, "%d\n", val); 583 } 584 585 static ssize_t writeback_limit_enable_store(struct device *dev, 586 struct device_attribute *attr, 587 const char *buf, size_t len) 588 { 589 struct zram *zram = dev_to_zram(dev); 590 u64 val; 591 592 if (kstrtoull(buf, 10, &val)) 593 return -EINVAL; 594 595 guard(rwsem_write)(&zram->dev_lock); 596 zram->wb_limit_enable = val; 597 598 return len; 599 } 600 601 static ssize_t writeback_limit_enable_show(struct device *dev, 602 struct device_attribute *attr, 603 char *buf) 604 { 605 bool val; 606 struct zram *zram = dev_to_zram(dev); 607 608 guard(rwsem_read)(&zram->dev_lock); 609 val = zram->wb_limit_enable; 610 611 return sysfs_emit(buf, "%d\n", val); 612 } 613 614 static ssize_t writeback_limit_store(struct device *dev, 615 struct device_attribute *attr, 616 const char *buf, size_t len) 617 { 618 struct zram *zram = dev_to_zram(dev); 619 u64 val; 620 621 if (kstrtoull(buf, 10, &val)) 622 return -EINVAL; 623 624 /* 625 * When the page size is greater than 4KB, if bd_wb_limit is set to 626 * a value that is not page - size aligned, it will cause value 627 * wrapping. For example, when the page size is set to 16KB and 628 * bd_wb_limit is set to 3, a single write - back operation will 629 * cause bd_wb_limit to become -1. Even more terrifying is that 630 * bd_wb_limit is an unsigned number. 631 */ 632 val = rounddown(val, PAGE_SIZE / 4096); 633 634 guard(rwsem_write)(&zram->dev_lock); 635 zram->bd_wb_limit = val; 636 637 return len; 638 } 639 640 static ssize_t writeback_limit_show(struct device *dev, 641 struct device_attribute *attr, char *buf) 642 { 643 u64 val; 644 struct zram *zram = dev_to_zram(dev); 645 646 guard(rwsem_read)(&zram->dev_lock); 647 val = zram->bd_wb_limit; 648 649 return sysfs_emit(buf, "%llu\n", val); 650 } 651 652 static ssize_t writeback_batch_size_store(struct device *dev, 653 struct device_attribute *attr, 654 const char *buf, size_t len) 655 { 656 struct zram *zram = dev_to_zram(dev); 657 u32 val; 658 659 if (kstrtouint(buf, 10, &val)) 660 return -EINVAL; 661 662 if (!val) 663 return -EINVAL; 664 665 guard(rwsem_write)(&zram->dev_lock); 666 zram->wb_batch_size = val; 667 668 return len; 669 } 670 671 static ssize_t writeback_batch_size_show(struct device *dev, 672 struct device_attribute *attr, 673 char *buf) 674 { 675 u32 val; 676 struct zram *zram = dev_to_zram(dev); 677 678 guard(rwsem_read)(&zram->dev_lock); 679 val = zram->wb_batch_size; 680 681 return sysfs_emit(buf, "%u\n", val); 682 } 683 684 static void reset_bdev(struct zram *zram) 685 { 686 if (!zram->backing_dev) 687 return; 688 689 /* hope filp_close flush all of IO */ 690 filp_close(zram->backing_dev, NULL); 691 zram->backing_dev = NULL; 692 zram->bdev = NULL; 693 zram->disk->fops = &zram_devops; 694 kvfree(zram->bitmap); 695 zram->bitmap = NULL; 696 } 697 698 static ssize_t backing_dev_show(struct device *dev, 699 struct device_attribute *attr, char *buf) 700 { 701 struct file *file; 702 struct zram *zram = dev_to_zram(dev); 703 char *p; 704 ssize_t ret; 705 706 guard(rwsem_read)(&zram->dev_lock); 707 file = zram->backing_dev; 708 if (!file) { 709 memcpy(buf, "none\n", 5); 710 return 5; 711 } 712 713 p = file_path(file, buf, PAGE_SIZE - 1); 714 if (IS_ERR(p)) 715 return PTR_ERR(p); 716 717 ret = strlen(p); 718 memmove(buf, p, ret); 719 buf[ret++] = '\n'; 720 return ret; 721 } 722 723 static ssize_t backing_dev_store(struct device *dev, 724 struct device_attribute *attr, const char *buf, 725 size_t len) 726 { 727 char *file_name; 728 size_t sz; 729 struct file *backing_dev = NULL; 730 struct inode *inode; 731 unsigned int bitmap_sz; 732 unsigned long nr_pages, *bitmap = NULL; 733 int err; 734 struct zram *zram = dev_to_zram(dev); 735 736 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 737 if (!file_name) 738 return -ENOMEM; 739 740 guard(rwsem_write)(&zram->dev_lock); 741 if (init_done(zram)) { 742 pr_info("Can't setup backing device for initialized device\n"); 743 err = -EBUSY; 744 goto out; 745 } 746 747 strscpy(file_name, buf, PATH_MAX); 748 /* ignore trailing newline */ 749 sz = strlen(file_name); 750 if (sz > 0 && file_name[sz - 1] == '\n') 751 file_name[sz - 1] = 0x00; 752 753 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 754 if (IS_ERR(backing_dev)) { 755 err = PTR_ERR(backing_dev); 756 backing_dev = NULL; 757 goto out; 758 } 759 760 inode = backing_dev->f_mapping->host; 761 762 /* Support only block device in this moment */ 763 if (!S_ISBLK(inode->i_mode)) { 764 err = -ENOTBLK; 765 goto out; 766 } 767 768 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 769 /* Refuse to use zero sized device (also prevents self reference) */ 770 if (!nr_pages) { 771 err = -EINVAL; 772 goto out; 773 } 774 775 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 776 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 777 if (!bitmap) { 778 err = -ENOMEM; 779 goto out; 780 } 781 782 reset_bdev(zram); 783 784 zram->bdev = I_BDEV(inode); 785 zram->backing_dev = backing_dev; 786 zram->bitmap = bitmap; 787 zram->nr_pages = nr_pages; 788 789 pr_info("setup backing device %s\n", file_name); 790 kfree(file_name); 791 792 return len; 793 out: 794 kvfree(bitmap); 795 796 if (backing_dev) 797 filp_close(backing_dev, NULL); 798 799 kfree(file_name); 800 801 return err; 802 } 803 804 static unsigned long zram_reserve_bdev_block(struct zram *zram) 805 { 806 unsigned long blk_idx; 807 808 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 809 if (blk_idx == zram->nr_pages) 810 return INVALID_BDEV_BLOCK; 811 812 set_bit(blk_idx, zram->bitmap); 813 atomic64_inc(&zram->stats.bd_count); 814 return blk_idx; 815 } 816 817 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 818 { 819 int was_set; 820 821 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 822 WARN_ON_ONCE(!was_set); 823 atomic64_dec(&zram->stats.bd_count); 824 } 825 826 static void release_wb_req(struct zram_wb_req *req) 827 { 828 __free_page(req->page); 829 kfree(req); 830 } 831 832 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 833 { 834 if (!wb_ctl) 835 return; 836 837 /* We should never have inflight requests at this point */ 838 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 839 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 840 841 while (!list_empty(&wb_ctl->idle_reqs)) { 842 struct zram_wb_req *req; 843 844 req = list_first_entry(&wb_ctl->idle_reqs, 845 struct zram_wb_req, entry); 846 list_del(&req->entry); 847 release_wb_req(req); 848 } 849 850 kfree(wb_ctl); 851 } 852 853 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 854 { 855 struct zram_wb_ctl *wb_ctl; 856 int i; 857 858 wb_ctl = kmalloc_obj(*wb_ctl); 859 if (!wb_ctl) 860 return NULL; 861 862 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 863 INIT_LIST_HEAD(&wb_ctl->done_reqs); 864 atomic_set(&wb_ctl->num_inflight, 0); 865 init_waitqueue_head(&wb_ctl->done_wait); 866 spin_lock_init(&wb_ctl->done_lock); 867 868 for (i = 0; i < zram->wb_batch_size; i++) { 869 struct zram_wb_req *req; 870 871 /* 872 * This is fatal condition only if we couldn't allocate 873 * any requests at all. Otherwise we just work with the 874 * requests that we have successfully allocated, so that 875 * writeback can still proceed, even if there is only one 876 * request on the idle list. 877 */ 878 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN); 879 if (!req) 880 break; 881 882 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 883 if (!req->page) { 884 kfree(req); 885 break; 886 } 887 888 list_add(&req->entry, &wb_ctl->idle_reqs); 889 } 890 891 /* We couldn't allocate any requests, so writeabck is not possible */ 892 if (list_empty(&wb_ctl->idle_reqs)) 893 goto release_wb_ctl; 894 895 return wb_ctl; 896 897 release_wb_ctl: 898 release_wb_ctl(wb_ctl); 899 return NULL; 900 } 901 902 static void zram_account_writeback_rollback(struct zram *zram) 903 { 904 lockdep_assert_held_write(&zram->dev_lock); 905 906 if (zram->wb_limit_enable) 907 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 908 } 909 910 static void zram_account_writeback_submit(struct zram *zram) 911 { 912 lockdep_assert_held_write(&zram->dev_lock); 913 914 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 915 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 916 } 917 918 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 919 { 920 u32 index = req->pps->index; 921 int err; 922 923 err = blk_status_to_errno(req->bio.bi_status); 924 if (err) { 925 /* 926 * Failed wb requests should not be accounted in wb_limit 927 * (if enabled). 928 */ 929 zram_account_writeback_rollback(zram); 930 zram_release_bdev_block(zram, req->blk_idx); 931 return err; 932 } 933 934 atomic64_inc(&zram->stats.bd_writes); 935 slot_lock(zram, index); 936 /* 937 * We release slot lock during writeback so slot can change under us: 938 * slot_free() or slot_free() and zram_write_page(). In both cases 939 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 940 * set ZRAM_PP_SLOT on such slots until current post-processing 941 * finishes. 942 */ 943 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { 944 zram_release_bdev_block(zram, req->blk_idx); 945 goto out; 946 } 947 948 clear_slot_flag(zram, index, ZRAM_IDLE); 949 if (test_slot_flag(zram, index, ZRAM_HUGE)) 950 atomic64_dec(&zram->stats.huge_pages); 951 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); 952 zs_free(zram->mem_pool, get_slot_handle(zram, index)); 953 set_slot_handle(zram, index, req->blk_idx); 954 set_slot_flag(zram, index, ZRAM_WB); 955 956 out: 957 slot_unlock(zram, index); 958 return 0; 959 } 960 961 static void zram_writeback_endio(struct bio *bio) 962 { 963 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 964 struct zram_wb_ctl *wb_ctl = bio->bi_private; 965 unsigned long flags; 966 967 spin_lock_irqsave(&wb_ctl->done_lock, flags); 968 list_add(&req->entry, &wb_ctl->done_reqs); 969 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 970 971 wake_up(&wb_ctl->done_wait); 972 } 973 974 static void zram_submit_wb_request(struct zram *zram, 975 struct zram_wb_ctl *wb_ctl, 976 struct zram_wb_req *req) 977 { 978 /* 979 * wb_limit (if enabled) should be adjusted before submission, 980 * so that we don't over-submit. 981 */ 982 zram_account_writeback_submit(zram); 983 atomic_inc(&wb_ctl->num_inflight); 984 req->bio.bi_private = wb_ctl; 985 submit_bio(&req->bio); 986 } 987 988 static int zram_complete_done_reqs(struct zram *zram, 989 struct zram_wb_ctl *wb_ctl) 990 { 991 struct zram_wb_req *req; 992 unsigned long flags; 993 int ret = 0, err; 994 995 while (atomic_read(&wb_ctl->num_inflight) > 0) { 996 spin_lock_irqsave(&wb_ctl->done_lock, flags); 997 req = list_first_entry_or_null(&wb_ctl->done_reqs, 998 struct zram_wb_req, entry); 999 if (req) 1000 list_del(&req->entry); 1001 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 1002 1003 /* ->num_inflight > 0 doesn't mean we have done requests */ 1004 if (!req) 1005 break; 1006 1007 err = zram_writeback_complete(zram, req); 1008 if (err) 1009 ret = err; 1010 1011 atomic_dec(&wb_ctl->num_inflight); 1012 release_pp_slot(zram, req->pps); 1013 req->pps = NULL; 1014 1015 list_add(&req->entry, &wb_ctl->idle_reqs); 1016 } 1017 1018 return ret; 1019 } 1020 1021 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 1022 { 1023 struct zram_wb_req *req; 1024 1025 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 1026 struct zram_wb_req, entry); 1027 if (req) 1028 list_del(&req->entry); 1029 return req; 1030 } 1031 1032 static int zram_writeback_slots(struct zram *zram, 1033 struct zram_pp_ctl *ctl, 1034 struct zram_wb_ctl *wb_ctl) 1035 { 1036 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1037 struct zram_wb_req *req = NULL; 1038 struct zram_pp_slot *pps; 1039 int ret = 0, err = 0; 1040 u32 index = 0; 1041 1042 while ((pps = select_pp_slot(ctl))) { 1043 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1044 ret = -EIO; 1045 break; 1046 } 1047 1048 while (!req) { 1049 req = zram_select_idle_req(wb_ctl); 1050 if (req) 1051 break; 1052 1053 wait_event(wb_ctl->done_wait, 1054 !list_empty(&wb_ctl->done_reqs)); 1055 1056 err = zram_complete_done_reqs(zram, wb_ctl); 1057 /* 1058 * BIO errors are not fatal, we continue and simply 1059 * attempt to writeback the remaining objects (pages). 1060 * At the same time we need to signal user-space that 1061 * some writes (at least one, but also could be all of 1062 * them) were not successful and we do so by returning 1063 * the most recent BIO error. 1064 */ 1065 if (err) 1066 ret = err; 1067 } 1068 1069 if (blk_idx == INVALID_BDEV_BLOCK) { 1070 blk_idx = zram_reserve_bdev_block(zram); 1071 if (blk_idx == INVALID_BDEV_BLOCK) { 1072 ret = -ENOSPC; 1073 break; 1074 } 1075 } 1076 1077 index = pps->index; 1078 slot_lock(zram, index); 1079 /* 1080 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1081 * slots can change in the meantime. If slots are accessed or 1082 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1083 * post-process them. 1084 */ 1085 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) 1086 goto next; 1087 if (zram->compressed_wb) 1088 err = read_from_zspool_raw(zram, req->page, index); 1089 else 1090 err = read_from_zspool(zram, req->page, index); 1091 if (err) 1092 goto next; 1093 slot_unlock(zram, index); 1094 1095 /* 1096 * From now on pp-slot is owned by the req, remove it from 1097 * its pp bucket. 1098 */ 1099 list_del_init(&pps->entry); 1100 1101 req->blk_idx = blk_idx; 1102 req->pps = pps; 1103 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1104 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1105 req->bio.bi_end_io = zram_writeback_endio; 1106 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1107 1108 zram_submit_wb_request(zram, wb_ctl, req); 1109 blk_idx = INVALID_BDEV_BLOCK; 1110 req = NULL; 1111 cond_resched(); 1112 continue; 1113 1114 next: 1115 slot_unlock(zram, index); 1116 release_pp_slot(zram, pps); 1117 } 1118 1119 /* 1120 * Selected idle req, but never submitted it due to some error or 1121 * wb limit. 1122 */ 1123 if (req) 1124 release_wb_req(req); 1125 1126 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1127 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1128 err = zram_complete_done_reqs(zram, wb_ctl); 1129 if (err) 1130 ret = err; 1131 } 1132 1133 return ret; 1134 } 1135 1136 #define PAGE_WRITEBACK 0 1137 #define HUGE_WRITEBACK (1 << 0) 1138 #define IDLE_WRITEBACK (1 << 1) 1139 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1140 1141 static int parse_page_index(char *val, unsigned long nr_pages, 1142 unsigned long *lo, unsigned long *hi) 1143 { 1144 int ret; 1145 1146 ret = kstrtoul(val, 10, lo); 1147 if (ret) 1148 return ret; 1149 if (*lo >= nr_pages) 1150 return -ERANGE; 1151 *hi = *lo + 1; 1152 return 0; 1153 } 1154 1155 static int parse_page_indexes(char *val, unsigned long nr_pages, 1156 unsigned long *lo, unsigned long *hi) 1157 { 1158 char *delim; 1159 int ret; 1160 1161 delim = strchr(val, '-'); 1162 if (!delim) 1163 return -EINVAL; 1164 1165 *delim = 0x00; 1166 ret = kstrtoul(val, 10, lo); 1167 if (ret) 1168 return ret; 1169 if (*lo >= nr_pages) 1170 return -ERANGE; 1171 1172 ret = kstrtoul(delim + 1, 10, hi); 1173 if (ret) 1174 return ret; 1175 if (*hi >= nr_pages || *lo > *hi) 1176 return -ERANGE; 1177 *hi += 1; 1178 return 0; 1179 } 1180 1181 static int parse_mode(char *val, u32 *mode) 1182 { 1183 *mode = 0; 1184 1185 if (!strcmp(val, "idle")) 1186 *mode = IDLE_WRITEBACK; 1187 if (!strcmp(val, "huge")) 1188 *mode = HUGE_WRITEBACK; 1189 if (!strcmp(val, "huge_idle")) 1190 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1191 if (!strcmp(val, "incompressible")) 1192 *mode = INCOMPRESSIBLE_WRITEBACK; 1193 1194 if (*mode == 0) 1195 return -EINVAL; 1196 return 0; 1197 } 1198 1199 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 1200 unsigned long lo, unsigned long hi, 1201 struct zram_pp_ctl *ctl) 1202 { 1203 u32 index = lo; 1204 1205 while (index < hi) { 1206 bool ok = true; 1207 1208 slot_lock(zram, index); 1209 if (!slot_allocated(zram, index)) 1210 goto next; 1211 1212 if (test_slot_flag(zram, index, ZRAM_WB) || 1213 test_slot_flag(zram, index, ZRAM_SAME)) 1214 goto next; 1215 1216 if (mode & IDLE_WRITEBACK && 1217 !test_slot_flag(zram, index, ZRAM_IDLE)) 1218 goto next; 1219 if (mode & HUGE_WRITEBACK && 1220 !test_slot_flag(zram, index, ZRAM_HUGE)) 1221 goto next; 1222 if (mode & INCOMPRESSIBLE_WRITEBACK && 1223 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1224 goto next; 1225 1226 ok = place_pp_slot(zram, ctl, index); 1227 next: 1228 slot_unlock(zram, index); 1229 if (!ok) 1230 break; 1231 index++; 1232 } 1233 1234 return 0; 1235 } 1236 1237 static ssize_t writeback_store(struct device *dev, 1238 struct device_attribute *attr, 1239 const char *buf, size_t len) 1240 { 1241 struct zram *zram = dev_to_zram(dev); 1242 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1243 unsigned long lo = 0, hi = nr_pages; 1244 struct zram_pp_ctl *pp_ctl = NULL; 1245 struct zram_wb_ctl *wb_ctl = NULL; 1246 char *args, *param, *val; 1247 ssize_t ret = len; 1248 int err, mode = 0; 1249 1250 guard(rwsem_write)(&zram->dev_lock); 1251 if (!init_done(zram)) 1252 return -EINVAL; 1253 1254 if (!zram->backing_dev) 1255 return -ENODEV; 1256 1257 pp_ctl = init_pp_ctl(); 1258 if (!pp_ctl) 1259 return -ENOMEM; 1260 1261 wb_ctl = init_wb_ctl(zram); 1262 if (!wb_ctl) { 1263 ret = -ENOMEM; 1264 goto out; 1265 } 1266 1267 args = skip_spaces(buf); 1268 while (*args) { 1269 args = next_arg(args, ¶m, &val); 1270 1271 /* 1272 * Workaround to support the old writeback interface. 1273 * 1274 * The old writeback interface has a minor inconsistency and 1275 * requires key=value only for page_index parameter, while the 1276 * writeback mode is a valueless parameter. 1277 * 1278 * This is not the case anymore and now all parameters are 1279 * required to have values, however, we need to support the 1280 * legacy writeback interface format so we check if we can 1281 * recognize a valueless parameter as the (legacy) writeback 1282 * mode. 1283 */ 1284 if (!val || !*val) { 1285 err = parse_mode(param, &mode); 1286 if (err) { 1287 ret = err; 1288 goto out; 1289 } 1290 1291 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1292 break; 1293 } 1294 1295 if (!strcmp(param, "type")) { 1296 err = parse_mode(val, &mode); 1297 if (err) { 1298 ret = err; 1299 goto out; 1300 } 1301 1302 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1303 break; 1304 } 1305 1306 if (!strcmp(param, "page_index")) { 1307 err = parse_page_index(val, nr_pages, &lo, &hi); 1308 if (err) { 1309 ret = err; 1310 goto out; 1311 } 1312 1313 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1314 continue; 1315 } 1316 1317 if (!strcmp(param, "page_indexes")) { 1318 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1319 if (err) { 1320 ret = err; 1321 goto out; 1322 } 1323 1324 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1325 continue; 1326 } 1327 } 1328 1329 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1330 if (err) 1331 ret = err; 1332 1333 out: 1334 release_pp_ctl(zram, pp_ctl); 1335 release_wb_ctl(wb_ctl); 1336 1337 return ret; 1338 } 1339 1340 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1341 { 1342 struct zcomp_strm *zstrm; 1343 unsigned int size; 1344 int ret, prio; 1345 void *src; 1346 1347 slot_lock(zram, index); 1348 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1349 if (!test_slot_flag(zram, index, ZRAM_WB)) { 1350 slot_unlock(zram, index); 1351 /* We read some stale data, zero it out */ 1352 memset_page(page, 0, 0, PAGE_SIZE); 1353 return -EIO; 1354 } 1355 1356 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1357 slot_unlock(zram, index); 1358 return 0; 1359 } 1360 1361 size = get_slot_size(zram, index); 1362 prio = get_slot_comp_priority(zram, index); 1363 1364 zstrm = zcomp_stream_get(zram->comps[prio]); 1365 src = kmap_local_page(page); 1366 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1367 zstrm->local_copy); 1368 if (!ret) 1369 copy_page(src, zstrm->local_copy); 1370 kunmap_local(src); 1371 zcomp_stream_put(zstrm); 1372 slot_unlock(zram, index); 1373 1374 return ret; 1375 } 1376 1377 static void zram_deferred_decompress(struct work_struct *w) 1378 { 1379 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1380 struct page *page = bio_first_page_all(req->bio); 1381 struct zram *zram = req->zram; 1382 u32 index = req->index; 1383 int ret; 1384 1385 ret = decompress_bdev_page(zram, page, index); 1386 if (ret) 1387 req->parent->bi_status = BLK_STS_IOERR; 1388 1389 /* Decrement parent's ->remaining */ 1390 bio_endio(req->parent); 1391 bio_put(req->bio); 1392 kfree(req); 1393 } 1394 1395 static void zram_async_read_endio(struct bio *bio) 1396 { 1397 struct zram_rb_req *req = bio->bi_private; 1398 struct zram *zram = req->zram; 1399 1400 if (bio->bi_status) { 1401 req->parent->bi_status = bio->bi_status; 1402 bio_endio(req->parent); 1403 bio_put(bio); 1404 kfree(req); 1405 return; 1406 } 1407 1408 /* 1409 * NOTE: zram_async_read_endio() is not exactly right place for this. 1410 * Ideally, we need to do it after ZRAM_WB check, but this requires 1411 * us to use wq path even on systems that don't enable compressed 1412 * writeback, because we cannot take slot-lock in the current context. 1413 * 1414 * Keep the existing behavior for now. 1415 */ 1416 if (zram->compressed_wb == false) { 1417 /* No decompression needed, complete the parent IO */ 1418 bio_endio(req->parent); 1419 bio_put(bio); 1420 kfree(req); 1421 return; 1422 } 1423 1424 /* 1425 * zram decompression is sleepable, so we need to deffer it to 1426 * a preemptible context. 1427 */ 1428 INIT_WORK(&req->work, zram_deferred_decompress); 1429 queue_work(system_highpri_wq, &req->work); 1430 } 1431 1432 static void read_from_bdev_async(struct zram *zram, struct page *page, 1433 u32 index, unsigned long blk_idx, 1434 struct bio *parent) 1435 { 1436 struct zram_rb_req *req; 1437 struct bio *bio; 1438 1439 req = kmalloc_obj(*req, GFP_NOIO); 1440 if (!req) 1441 return; 1442 1443 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1444 if (!bio) { 1445 kfree(req); 1446 return; 1447 } 1448 1449 req->zram = zram; 1450 req->index = index; 1451 req->blk_idx = blk_idx; 1452 req->bio = bio; 1453 req->parent = parent; 1454 1455 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1456 bio->bi_private = req; 1457 bio->bi_end_io = zram_async_read_endio; 1458 1459 __bio_add_page(bio, page, PAGE_SIZE, 0); 1460 bio_inc_remaining(parent); 1461 submit_bio(bio); 1462 } 1463 1464 static void zram_sync_read(struct work_struct *w) 1465 { 1466 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1467 struct bio_vec bv; 1468 struct bio bio; 1469 1470 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1471 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1472 __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1473 req->error = submit_bio_wait(&bio); 1474 } 1475 1476 /* 1477 * Block layer want one ->submit_bio to be active at a time, so if we use 1478 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1479 * use a worker thread context. 1480 */ 1481 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1482 unsigned long blk_idx) 1483 { 1484 struct zram_rb_req req; 1485 1486 req.page = page; 1487 req.zram = zram; 1488 req.blk_idx = blk_idx; 1489 1490 INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1491 queue_work(system_dfl_wq, &req.work); 1492 flush_work(&req.work); 1493 destroy_work_on_stack(&req.work); 1494 1495 if (req.error || zram->compressed_wb == false) 1496 return req.error; 1497 1498 return decompress_bdev_page(zram, page, index); 1499 } 1500 1501 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1502 unsigned long blk_idx, struct bio *parent) 1503 { 1504 atomic64_inc(&zram->stats.bd_reads); 1505 if (!parent) { 1506 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1507 return -EIO; 1508 return read_from_bdev_sync(zram, page, index, blk_idx); 1509 } 1510 read_from_bdev_async(zram, page, index, blk_idx, parent); 1511 return 0; 1512 } 1513 #else 1514 static inline void reset_bdev(struct zram *zram) {}; 1515 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1516 unsigned long blk_idx, struct bio *parent) 1517 { 1518 return -EIO; 1519 } 1520 1521 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1522 { 1523 } 1524 #endif 1525 1526 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1527 1528 static struct dentry *zram_debugfs_root; 1529 1530 static void zram_debugfs_create(void) 1531 { 1532 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1533 } 1534 1535 static void zram_debugfs_destroy(void) 1536 { 1537 debugfs_remove_recursive(zram_debugfs_root); 1538 } 1539 1540 static ssize_t read_block_state(struct file *file, char __user *buf, 1541 size_t count, loff_t *ppos) 1542 { 1543 char *kbuf; 1544 ssize_t index, written = 0; 1545 struct zram *zram = file->private_data; 1546 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1547 1548 kbuf = kvmalloc(count, GFP_KERNEL); 1549 if (!kbuf) 1550 return -ENOMEM; 1551 1552 guard(rwsem_read)(&zram->dev_lock); 1553 if (!init_done(zram)) { 1554 kvfree(kbuf); 1555 return -EINVAL; 1556 } 1557 1558 for (index = *ppos; index < nr_pages; index++) { 1559 int copied; 1560 1561 slot_lock(zram, index); 1562 if (!slot_allocated(zram, index)) 1563 goto next; 1564 1565 copied = snprintf(kbuf + written, count, 1566 "%12zd %12u.%06d %c%c%c%c%c%c\n", 1567 index, zram->table[index].attr.ac_time, 0, 1568 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1569 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1570 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1571 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1572 get_slot_comp_priority(zram, index) ? 'r' : '.', 1573 test_slot_flag(zram, index, 1574 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1575 1576 if (count <= copied) { 1577 slot_unlock(zram, index); 1578 break; 1579 } 1580 written += copied; 1581 count -= copied; 1582 next: 1583 slot_unlock(zram, index); 1584 *ppos += 1; 1585 } 1586 1587 if (copy_to_user(buf, kbuf, written)) 1588 written = -EFAULT; 1589 kvfree(kbuf); 1590 1591 return written; 1592 } 1593 1594 static const struct file_operations proc_zram_block_state_op = { 1595 .open = simple_open, 1596 .read = read_block_state, 1597 .llseek = default_llseek, 1598 }; 1599 1600 static void zram_debugfs_register(struct zram *zram) 1601 { 1602 if (!zram_debugfs_root) 1603 return; 1604 1605 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1606 zram_debugfs_root); 1607 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1608 zram, &proc_zram_block_state_op); 1609 } 1610 1611 static void zram_debugfs_unregister(struct zram *zram) 1612 { 1613 debugfs_remove_recursive(zram->debugfs_dir); 1614 } 1615 #else 1616 static void zram_debugfs_create(void) {}; 1617 static void zram_debugfs_destroy(void) {}; 1618 static void zram_debugfs_register(struct zram *zram) {}; 1619 static void zram_debugfs_unregister(struct zram *zram) {}; 1620 #endif 1621 1622 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1623 { 1624 /* Do not free statically defined compression algorithms */ 1625 if (zram->comp_algs[prio] != default_compressor) 1626 kfree(zram->comp_algs[prio]); 1627 1628 zram->comp_algs[prio] = alg; 1629 } 1630 1631 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1632 { 1633 char *compressor; 1634 size_t sz; 1635 1636 sz = strlen(buf); 1637 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1638 return -E2BIG; 1639 1640 compressor = kstrdup(buf, GFP_KERNEL); 1641 if (!compressor) 1642 return -ENOMEM; 1643 1644 /* ignore trailing newline */ 1645 if (sz > 0 && compressor[sz - 1] == '\n') 1646 compressor[sz - 1] = 0x00; 1647 1648 if (!zcomp_available_algorithm(compressor)) { 1649 kfree(compressor); 1650 return -EINVAL; 1651 } 1652 1653 guard(rwsem_write)(&zram->dev_lock); 1654 if (init_done(zram)) { 1655 kfree(compressor); 1656 pr_info("Can't change algorithm for initialized device\n"); 1657 return -EBUSY; 1658 } 1659 1660 comp_algorithm_set(zram, prio, compressor); 1661 return 0; 1662 } 1663 1664 static void comp_params_reset(struct zram *zram, u32 prio) 1665 { 1666 struct zcomp_params *params = &zram->params[prio]; 1667 1668 vfree(params->dict); 1669 params->level = ZCOMP_PARAM_NOT_SET; 1670 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1671 params->dict_sz = 0; 1672 params->dict = NULL; 1673 } 1674 1675 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1676 const char *dict_path, 1677 struct deflate_params *deflate_params) 1678 { 1679 ssize_t sz = 0; 1680 1681 comp_params_reset(zram, prio); 1682 1683 if (dict_path) { 1684 sz = kernel_read_file_from_path(dict_path, 0, 1685 &zram->params[prio].dict, 1686 INT_MAX, 1687 NULL, 1688 READING_POLICY); 1689 if (sz < 0) 1690 return -EINVAL; 1691 } 1692 1693 zram->params[prio].dict_sz = sz; 1694 zram->params[prio].level = level; 1695 zram->params[prio].deflate.winbits = deflate_params->winbits; 1696 return 0; 1697 } 1698 1699 static ssize_t algorithm_params_store(struct device *dev, 1700 struct device_attribute *attr, 1701 const char *buf, 1702 size_t len) 1703 { 1704 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1705 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1706 struct deflate_params deflate_params; 1707 struct zram *zram = dev_to_zram(dev); 1708 int ret; 1709 1710 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1711 1712 args = skip_spaces(buf); 1713 while (*args) { 1714 args = next_arg(args, ¶m, &val); 1715 1716 if (!val || !*val) 1717 return -EINVAL; 1718 1719 if (!strcmp(param, "priority")) { 1720 ret = kstrtoint(val, 10, &prio); 1721 if (ret) 1722 return ret; 1723 continue; 1724 } 1725 1726 if (!strcmp(param, "level")) { 1727 ret = kstrtoint(val, 10, &level); 1728 if (ret) 1729 return ret; 1730 continue; 1731 } 1732 1733 if (!strcmp(param, "algo")) { 1734 algo = val; 1735 continue; 1736 } 1737 1738 if (!strcmp(param, "dict")) { 1739 dict_path = val; 1740 continue; 1741 } 1742 1743 if (!strcmp(param, "deflate.winbits")) { 1744 ret = kstrtoint(val, 10, &deflate_params.winbits); 1745 if (ret) 1746 return ret; 1747 continue; 1748 } 1749 } 1750 1751 /* Lookup priority by algorithm name */ 1752 if (algo) { 1753 s32 p; 1754 1755 prio = -EINVAL; 1756 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1757 if (!zram->comp_algs[p]) 1758 continue; 1759 1760 if (!strcmp(zram->comp_algs[p], algo)) { 1761 prio = p; 1762 break; 1763 } 1764 } 1765 } 1766 1767 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1768 return -EINVAL; 1769 1770 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1771 return ret ? ret : len; 1772 } 1773 1774 static ssize_t comp_algorithm_show(struct device *dev, 1775 struct device_attribute *attr, 1776 char *buf) 1777 { 1778 struct zram *zram = dev_to_zram(dev); 1779 ssize_t sz; 1780 1781 guard(rwsem_read)(&zram->dev_lock); 1782 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1783 return sz; 1784 } 1785 1786 static ssize_t comp_algorithm_store(struct device *dev, 1787 struct device_attribute *attr, 1788 const char *buf, 1789 size_t len) 1790 { 1791 struct zram *zram = dev_to_zram(dev); 1792 int ret; 1793 1794 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1795 return ret ? ret : len; 1796 } 1797 1798 #ifdef CONFIG_ZRAM_MULTI_COMP 1799 static ssize_t recomp_algorithm_show(struct device *dev, 1800 struct device_attribute *attr, 1801 char *buf) 1802 { 1803 struct zram *zram = dev_to_zram(dev); 1804 ssize_t sz = 0; 1805 u32 prio; 1806 1807 guard(rwsem_read)(&zram->dev_lock); 1808 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1809 if (!zram->comp_algs[prio]) 1810 continue; 1811 1812 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1813 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1814 } 1815 return sz; 1816 } 1817 1818 static ssize_t recomp_algorithm_store(struct device *dev, 1819 struct device_attribute *attr, 1820 const char *buf, 1821 size_t len) 1822 { 1823 struct zram *zram = dev_to_zram(dev); 1824 int prio = ZRAM_SECONDARY_COMP; 1825 char *args, *param, *val; 1826 char *alg = NULL; 1827 int ret; 1828 1829 args = skip_spaces(buf); 1830 while (*args) { 1831 args = next_arg(args, ¶m, &val); 1832 1833 if (!val || !*val) 1834 return -EINVAL; 1835 1836 if (!strcmp(param, "algo")) { 1837 alg = val; 1838 continue; 1839 } 1840 1841 if (!strcmp(param, "priority")) { 1842 ret = kstrtoint(val, 10, &prio); 1843 if (ret) 1844 return ret; 1845 continue; 1846 } 1847 } 1848 1849 if (!alg) 1850 return -EINVAL; 1851 1852 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1853 return -EINVAL; 1854 1855 ret = __comp_algorithm_store(zram, prio, alg); 1856 return ret ? ret : len; 1857 } 1858 #endif 1859 1860 static ssize_t compact_store(struct device *dev, struct device_attribute *attr, 1861 const char *buf, size_t len) 1862 { 1863 struct zram *zram = dev_to_zram(dev); 1864 1865 guard(rwsem_read)(&zram->dev_lock); 1866 if (!init_done(zram)) 1867 return -EINVAL; 1868 1869 zs_compact(zram->mem_pool); 1870 1871 return len; 1872 } 1873 1874 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, 1875 char *buf) 1876 { 1877 struct zram *zram = dev_to_zram(dev); 1878 ssize_t ret; 1879 1880 guard(rwsem_read)(&zram->dev_lock); 1881 ret = sysfs_emit(buf, 1882 "%8llu %8llu 0 %8llu\n", 1883 (u64)atomic64_read(&zram->stats.failed_reads), 1884 (u64)atomic64_read(&zram->stats.failed_writes), 1885 (u64)atomic64_read(&zram->stats.notify_free)); 1886 1887 return ret; 1888 } 1889 1890 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, 1891 char *buf) 1892 { 1893 struct zram *zram = dev_to_zram(dev); 1894 struct zs_pool_stats pool_stats; 1895 u64 orig_size, mem_used = 0; 1896 long max_used; 1897 ssize_t ret; 1898 1899 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1900 1901 guard(rwsem_read)(&zram->dev_lock); 1902 if (init_done(zram)) { 1903 mem_used = zs_get_total_pages(zram->mem_pool); 1904 zs_pool_stats(zram->mem_pool, &pool_stats); 1905 } 1906 1907 orig_size = atomic64_read(&zram->stats.pages_stored); 1908 max_used = atomic_long_read(&zram->stats.max_used_pages); 1909 1910 ret = sysfs_emit(buf, 1911 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1912 orig_size << PAGE_SHIFT, 1913 (u64)atomic64_read(&zram->stats.compr_data_size), 1914 mem_used << PAGE_SHIFT, 1915 zram->limit_pages << PAGE_SHIFT, 1916 max_used << PAGE_SHIFT, 1917 (u64)atomic64_read(&zram->stats.same_pages), 1918 atomic_long_read(&pool_stats.pages_compacted), 1919 (u64)atomic64_read(&zram->stats.huge_pages), 1920 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1921 1922 return ret; 1923 } 1924 1925 static ssize_t debug_stat_show(struct device *dev, 1926 struct device_attribute *attr, char *buf) 1927 { 1928 int version = 1; 1929 struct zram *zram = dev_to_zram(dev); 1930 ssize_t ret; 1931 1932 guard(rwsem_read)(&zram->dev_lock); 1933 ret = sysfs_emit(buf, 1934 "version: %d\n0 %8llu\n", 1935 version, 1936 (u64)atomic64_read(&zram->stats.miss_free)); 1937 1938 return ret; 1939 } 1940 1941 static void zram_meta_free(struct zram *zram, u64 disksize) 1942 { 1943 size_t num_pages = disksize >> PAGE_SHIFT; 1944 size_t index; 1945 1946 if (!zram->table) 1947 return; 1948 1949 /* Free all pages that are still in this zram device */ 1950 for (index = 0; index < num_pages; index++) 1951 slot_free(zram, index); 1952 1953 zs_destroy_pool(zram->mem_pool); 1954 vfree(zram->table); 1955 zram->table = NULL; 1956 } 1957 1958 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1959 { 1960 size_t num_pages, index; 1961 1962 num_pages = disksize >> PAGE_SHIFT; 1963 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1964 if (!zram->table) 1965 return false; 1966 1967 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1968 if (!zram->mem_pool) { 1969 vfree(zram->table); 1970 zram->table = NULL; 1971 return false; 1972 } 1973 1974 if (!huge_class_size) 1975 huge_class_size = zs_huge_class_size(zram->mem_pool); 1976 1977 for (index = 0; index < num_pages; index++) 1978 slot_lock_init(zram, index); 1979 1980 return true; 1981 } 1982 1983 static void slot_free(struct zram *zram, u32 index) 1984 { 1985 unsigned long handle; 1986 1987 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 1988 zram->table[index].attr.ac_time = 0; 1989 #endif 1990 1991 clear_slot_flag(zram, index, ZRAM_IDLE); 1992 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1993 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 1994 set_slot_comp_priority(zram, index, 0); 1995 1996 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1997 /* 1998 * Writeback completion decrements ->huge_pages but keeps 1999 * ZRAM_HUGE flag for deferred decompression path. 2000 */ 2001 if (!test_slot_flag(zram, index, ZRAM_WB)) 2002 atomic64_dec(&zram->stats.huge_pages); 2003 clear_slot_flag(zram, index, ZRAM_HUGE); 2004 } 2005 2006 if (test_slot_flag(zram, index, ZRAM_WB)) { 2007 clear_slot_flag(zram, index, ZRAM_WB); 2008 zram_release_bdev_block(zram, get_slot_handle(zram, index)); 2009 goto out; 2010 } 2011 2012 /* 2013 * No memory is allocated for same element filled pages. 2014 * Simply clear same page flag. 2015 */ 2016 if (test_slot_flag(zram, index, ZRAM_SAME)) { 2017 clear_slot_flag(zram, index, ZRAM_SAME); 2018 atomic64_dec(&zram->stats.same_pages); 2019 goto out; 2020 } 2021 2022 handle = get_slot_handle(zram, index); 2023 if (!handle) 2024 return; 2025 2026 zs_free(zram->mem_pool, handle); 2027 2028 atomic64_sub(get_slot_size(zram, index), 2029 &zram->stats.compr_data_size); 2030 out: 2031 atomic64_dec(&zram->stats.pages_stored); 2032 set_slot_handle(zram, index, 0); 2033 set_slot_size(zram, index, 0); 2034 } 2035 2036 static int read_same_filled_page(struct zram *zram, struct page *page, 2037 u32 index) 2038 { 2039 void *mem; 2040 2041 mem = kmap_local_page(page); 2042 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); 2043 kunmap_local(mem); 2044 return 0; 2045 } 2046 2047 static int read_incompressible_page(struct zram *zram, struct page *page, 2048 u32 index) 2049 { 2050 unsigned long handle; 2051 void *src, *dst; 2052 2053 handle = get_slot_handle(zram, index); 2054 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); 2055 dst = kmap_local_page(page); 2056 copy_page(dst, src); 2057 kunmap_local(dst); 2058 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); 2059 2060 return 0; 2061 } 2062 2063 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 2064 { 2065 struct zcomp_strm *zstrm; 2066 unsigned long handle; 2067 unsigned int size; 2068 void *src, *dst; 2069 int ret, prio; 2070 2071 handle = get_slot_handle(zram, index); 2072 size = get_slot_size(zram, index); 2073 prio = get_slot_comp_priority(zram, index); 2074 2075 zstrm = zcomp_stream_get(zram->comps[prio]); 2076 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2077 zstrm->local_copy); 2078 dst = kmap_local_page(page); 2079 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 2080 kunmap_local(dst); 2081 zs_obj_read_end(zram->mem_pool, handle, size, src); 2082 zcomp_stream_put(zstrm); 2083 2084 return ret; 2085 } 2086 2087 #if defined CONFIG_ZRAM_WRITEBACK 2088 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 2089 { 2090 struct zcomp_strm *zstrm; 2091 unsigned long handle; 2092 unsigned int size; 2093 void *src; 2094 2095 handle = get_slot_handle(zram, index); 2096 size = get_slot_size(zram, index); 2097 2098 /* 2099 * We need to get stream just for ->local_copy buffer, in 2100 * case if object spans two physical pages. No decompression 2101 * takes place here, as we read raw compressed data. 2102 */ 2103 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2104 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2105 zstrm->local_copy); 2106 memcpy_to_page(page, 0, src, size); 2107 zs_obj_read_end(zram->mem_pool, handle, size, src); 2108 zcomp_stream_put(zstrm); 2109 2110 return 0; 2111 } 2112 #endif 2113 2114 /* 2115 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2116 * Corresponding ZRAM slot should be locked. 2117 */ 2118 static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2119 { 2120 if (test_slot_flag(zram, index, ZRAM_SAME) || 2121 !get_slot_handle(zram, index)) 2122 return read_same_filled_page(zram, page, index); 2123 2124 if (!test_slot_flag(zram, index, ZRAM_HUGE)) 2125 return read_compressed_page(zram, page, index); 2126 else 2127 return read_incompressible_page(zram, page, index); 2128 } 2129 2130 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 2131 struct bio *parent) 2132 { 2133 int ret; 2134 2135 slot_lock(zram, index); 2136 if (!test_slot_flag(zram, index, ZRAM_WB)) { 2137 /* Slot should be locked through out the function call */ 2138 ret = read_from_zspool(zram, page, index); 2139 slot_unlock(zram, index); 2140 } else { 2141 unsigned long blk_idx = get_slot_handle(zram, index); 2142 2143 /* 2144 * The slot should be unlocked before reading from the backing 2145 * device. 2146 */ 2147 slot_unlock(zram, index); 2148 ret = read_from_bdev(zram, page, index, blk_idx, parent); 2149 } 2150 2151 /* Should NEVER happen. Return bio error if it does. */ 2152 if (WARN_ON(ret < 0)) 2153 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2154 2155 return ret; 2156 } 2157 2158 /* 2159 * Use a temporary buffer to decompress the page, as the decompressor 2160 * always expects a full page for the output. 2161 */ 2162 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2163 u32 index, int offset) 2164 { 2165 struct page *page = alloc_page(GFP_NOIO); 2166 int ret; 2167 2168 if (!page) 2169 return -ENOMEM; 2170 ret = zram_read_page(zram, page, index, NULL); 2171 if (likely(!ret)) 2172 memcpy_to_bvec(bvec, page_address(page) + offset); 2173 __free_page(page); 2174 return ret; 2175 } 2176 2177 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2178 u32 index, int offset, struct bio *bio) 2179 { 2180 if (is_partial_io(bvec)) 2181 return zram_bvec_read_partial(zram, bvec, index, offset); 2182 return zram_read_page(zram, bvec->bv_page, index, bio); 2183 } 2184 2185 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2186 u32 index) 2187 { 2188 slot_lock(zram, index); 2189 slot_free(zram, index); 2190 set_slot_flag(zram, index, ZRAM_SAME); 2191 set_slot_handle(zram, index, fill); 2192 slot_unlock(zram, index); 2193 2194 atomic64_inc(&zram->stats.same_pages); 2195 atomic64_inc(&zram->stats.pages_stored); 2196 2197 return 0; 2198 } 2199 2200 static int write_incompressible_page(struct zram *zram, struct page *page, 2201 u32 index) 2202 { 2203 unsigned long handle; 2204 void *src; 2205 2206 /* 2207 * This function is called from preemptible context so we don't need 2208 * to do optimistic and fallback to pessimistic handle allocation, 2209 * like we do for compressible pages. 2210 */ 2211 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2212 GFP_NOIO | __GFP_NOWARN | 2213 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2214 if (IS_ERR_VALUE(handle)) 2215 return PTR_ERR((void *)handle); 2216 2217 if (!zram_can_store_page(zram)) { 2218 zs_free(zram->mem_pool, handle); 2219 return -ENOMEM; 2220 } 2221 2222 src = kmap_local_page(page); 2223 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2224 kunmap_local(src); 2225 2226 slot_lock(zram, index); 2227 slot_free(zram, index); 2228 set_slot_flag(zram, index, ZRAM_HUGE); 2229 set_slot_handle(zram, index, handle); 2230 set_slot_size(zram, index, PAGE_SIZE); 2231 slot_unlock(zram, index); 2232 2233 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2234 atomic64_inc(&zram->stats.huge_pages); 2235 atomic64_inc(&zram->stats.huge_pages_since); 2236 atomic64_inc(&zram->stats.pages_stored); 2237 2238 return 0; 2239 } 2240 2241 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2242 { 2243 int ret = 0; 2244 unsigned long handle; 2245 unsigned int comp_len; 2246 void *mem; 2247 struct zcomp_strm *zstrm; 2248 unsigned long element; 2249 bool same_filled; 2250 2251 mem = kmap_local_page(page); 2252 same_filled = page_same_filled(mem, &element); 2253 kunmap_local(mem); 2254 if (same_filled) 2255 return write_same_filled_page(zram, element, index); 2256 2257 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2258 mem = kmap_local_page(page); 2259 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2260 mem, &comp_len); 2261 kunmap_local(mem); 2262 2263 if (unlikely(ret)) { 2264 zcomp_stream_put(zstrm); 2265 pr_err("Compression failed! err=%d\n", ret); 2266 return ret; 2267 } 2268 2269 if (comp_len >= huge_class_size) { 2270 zcomp_stream_put(zstrm); 2271 return write_incompressible_page(zram, page, index); 2272 } 2273 2274 handle = zs_malloc(zram->mem_pool, comp_len, 2275 GFP_NOIO | __GFP_NOWARN | 2276 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2277 if (IS_ERR_VALUE(handle)) { 2278 zcomp_stream_put(zstrm); 2279 return PTR_ERR((void *)handle); 2280 } 2281 2282 if (!zram_can_store_page(zram)) { 2283 zcomp_stream_put(zstrm); 2284 zs_free(zram->mem_pool, handle); 2285 return -ENOMEM; 2286 } 2287 2288 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2289 zcomp_stream_put(zstrm); 2290 2291 slot_lock(zram, index); 2292 slot_free(zram, index); 2293 set_slot_handle(zram, index, handle); 2294 set_slot_size(zram, index, comp_len); 2295 slot_unlock(zram, index); 2296 2297 /* Update stats */ 2298 atomic64_inc(&zram->stats.pages_stored); 2299 atomic64_add(comp_len, &zram->stats.compr_data_size); 2300 2301 return ret; 2302 } 2303 2304 /* 2305 * This is a partial IO. Read the full page before writing the changes. 2306 */ 2307 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2308 u32 index, int offset, struct bio *bio) 2309 { 2310 struct page *page = alloc_page(GFP_NOIO); 2311 int ret; 2312 2313 if (!page) 2314 return -ENOMEM; 2315 2316 ret = zram_read_page(zram, page, index, bio); 2317 if (!ret) { 2318 memcpy_from_bvec(page_address(page) + offset, bvec); 2319 ret = zram_write_page(zram, page, index); 2320 } 2321 __free_page(page); 2322 return ret; 2323 } 2324 2325 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2326 u32 index, int offset, struct bio *bio) 2327 { 2328 if (is_partial_io(bvec)) 2329 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 2330 return zram_write_page(zram, bvec->bv_page, index); 2331 } 2332 2333 #ifdef CONFIG_ZRAM_MULTI_COMP 2334 #define RECOMPRESS_IDLE (1 << 0) 2335 #define RECOMPRESS_HUGE (1 << 1) 2336 2337 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, 2338 struct zram_pp_ctl *ctl) 2339 { 2340 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2341 unsigned long index; 2342 2343 for (index = 0; index < nr_pages; index++) { 2344 bool ok = true; 2345 2346 slot_lock(zram, index); 2347 if (!slot_allocated(zram, index)) 2348 goto next; 2349 2350 if (mode & RECOMPRESS_IDLE && 2351 !test_slot_flag(zram, index, ZRAM_IDLE)) 2352 goto next; 2353 2354 if (mode & RECOMPRESS_HUGE && 2355 !test_slot_flag(zram, index, ZRAM_HUGE)) 2356 goto next; 2357 2358 if (test_slot_flag(zram, index, ZRAM_WB) || 2359 test_slot_flag(zram, index, ZRAM_SAME) || 2360 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2361 goto next; 2362 2363 /* Already compressed with same of higher priority */ 2364 if (get_slot_comp_priority(zram, index) + 1 >= prio_max) 2365 goto next; 2366 2367 ok = place_pp_slot(zram, ctl, index); 2368 next: 2369 slot_unlock(zram, index); 2370 if (!ok) 2371 break; 2372 } 2373 2374 return 0; 2375 } 2376 2377 /* 2378 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2379 * attempt to compress it using provided compression algorithm priority 2380 * (which is potentially more effective). 2381 * 2382 * Corresponding ZRAM slot should be locked. 2383 */ 2384 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2385 u64 *num_recomp_pages, u32 threshold, u32 prio, 2386 u32 prio_max) 2387 { 2388 struct zcomp_strm *zstrm = NULL; 2389 unsigned long handle_old; 2390 unsigned long handle_new; 2391 unsigned int comp_len_old; 2392 unsigned int comp_len_new; 2393 unsigned int class_index_old; 2394 unsigned int class_index_new; 2395 void *src; 2396 int ret = 0; 2397 2398 handle_old = get_slot_handle(zram, index); 2399 if (!handle_old) 2400 return -EINVAL; 2401 2402 comp_len_old = get_slot_size(zram, index); 2403 /* 2404 * Do not recompress objects that are already "small enough". 2405 */ 2406 if (comp_len_old < threshold) 2407 return 0; 2408 2409 ret = read_from_zspool(zram, page, index); 2410 if (ret) 2411 return ret; 2412 2413 /* 2414 * We touched this entry so mark it as non-IDLE. This makes sure that 2415 * we don't preserve IDLE flag and don't incorrectly pick this entry 2416 * for different post-processing type (e.g. writeback). 2417 */ 2418 clear_slot_flag(zram, index, ZRAM_IDLE); 2419 2420 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2421 2422 prio = max(prio, get_slot_comp_priority(zram, index) + 1); 2423 /* 2424 * Recompression slots scan should not select slots that are 2425 * already compressed with a higher priority algorithm, but 2426 * just in case 2427 */ 2428 if (prio >= prio_max) 2429 return 0; 2430 2431 /* 2432 * Iterate the secondary comp algorithms list (in order of priority) 2433 * and try to recompress the page. 2434 */ 2435 for (; prio < prio_max; prio++) { 2436 if (!zram->comps[prio]) 2437 continue; 2438 2439 zstrm = zcomp_stream_get(zram->comps[prio]); 2440 src = kmap_local_page(page); 2441 ret = zcomp_compress(zram->comps[prio], zstrm, 2442 src, &comp_len_new); 2443 kunmap_local(src); 2444 2445 if (ret) { 2446 zcomp_stream_put(zstrm); 2447 zstrm = NULL; 2448 break; 2449 } 2450 2451 class_index_new = zs_lookup_class_index(zram->mem_pool, 2452 comp_len_new); 2453 2454 /* Continue until we make progress */ 2455 if (class_index_new >= class_index_old || 2456 (threshold && comp_len_new >= threshold)) { 2457 zcomp_stream_put(zstrm); 2458 zstrm = NULL; 2459 continue; 2460 } 2461 2462 /* Recompression was successful so break out */ 2463 break; 2464 } 2465 2466 /* 2467 * Decrement the limit (if set) on pages we can recompress, even 2468 * when current recompression was unsuccessful or did not compress 2469 * the page below the threshold, because we still spent resources 2470 * on it. 2471 */ 2472 if (*num_recomp_pages) 2473 *num_recomp_pages -= 1; 2474 2475 /* Compression error */ 2476 if (ret) 2477 return ret; 2478 2479 if (!zstrm) { 2480 /* 2481 * Secondary algorithms failed to re-compress the page 2482 * in a way that would save memory. 2483 * 2484 * Mark the object incompressible if the max-priority 2485 * algorithm couldn't re-compress it. 2486 */ 2487 if (prio < zram->num_active_comps) 2488 return 0; 2489 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2490 return 0; 2491 } 2492 2493 /* 2494 * We are holding per-CPU stream mutex and entry lock so better 2495 * avoid direct reclaim. Allocation error is not fatal since 2496 * we still have the old object in the mem_pool. 2497 * 2498 * XXX: technically, the node we really want here is the node that 2499 * holds the original compressed data. But that would require us to 2500 * modify zsmalloc API to return this information. For now, we will 2501 * make do with the node of the page allocated for recompression. 2502 */ 2503 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2504 GFP_NOIO | __GFP_NOWARN | 2505 __GFP_HIGHMEM | __GFP_MOVABLE, 2506 page_to_nid(page)); 2507 if (IS_ERR_VALUE(handle_new)) { 2508 zcomp_stream_put(zstrm); 2509 return PTR_ERR((void *)handle_new); 2510 } 2511 2512 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2513 zcomp_stream_put(zstrm); 2514 2515 slot_free(zram, index); 2516 set_slot_handle(zram, index, handle_new); 2517 set_slot_size(zram, index, comp_len_new); 2518 set_slot_comp_priority(zram, index, prio); 2519 2520 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2521 atomic64_inc(&zram->stats.pages_stored); 2522 2523 return 0; 2524 } 2525 2526 static ssize_t recompress_store(struct device *dev, 2527 struct device_attribute *attr, 2528 const char *buf, size_t len) 2529 { 2530 struct zram *zram = dev_to_zram(dev); 2531 char *args, *param, *val, *algo = NULL; 2532 u64 num_recomp_pages = ULLONG_MAX; 2533 struct zram_pp_ctl *ctl = NULL; 2534 struct zram_pp_slot *pps; 2535 u32 mode = 0, threshold = 0; 2536 u32 prio, prio_max; 2537 struct page *page = NULL; 2538 ssize_t ret; 2539 2540 prio = ZRAM_SECONDARY_COMP; 2541 prio_max = zram->num_active_comps; 2542 2543 args = skip_spaces(buf); 2544 while (*args) { 2545 args = next_arg(args, ¶m, &val); 2546 2547 if (!val || !*val) 2548 return -EINVAL; 2549 2550 if (!strcmp(param, "type")) { 2551 if (!strcmp(val, "idle")) 2552 mode = RECOMPRESS_IDLE; 2553 if (!strcmp(val, "huge")) 2554 mode = RECOMPRESS_HUGE; 2555 if (!strcmp(val, "huge_idle")) 2556 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2557 continue; 2558 } 2559 2560 if (!strcmp(param, "max_pages")) { 2561 /* 2562 * Limit the number of entries (pages) we attempt to 2563 * recompress. 2564 */ 2565 ret = kstrtoull(val, 10, &num_recomp_pages); 2566 if (ret) 2567 return ret; 2568 continue; 2569 } 2570 2571 if (!strcmp(param, "threshold")) { 2572 /* 2573 * We will re-compress only idle objects equal or 2574 * greater in size than watermark. 2575 */ 2576 ret = kstrtouint(val, 10, &threshold); 2577 if (ret) 2578 return ret; 2579 continue; 2580 } 2581 2582 if (!strcmp(param, "algo")) { 2583 algo = val; 2584 continue; 2585 } 2586 2587 if (!strcmp(param, "priority")) { 2588 ret = kstrtouint(val, 10, &prio); 2589 if (ret) 2590 return ret; 2591 2592 if (prio == ZRAM_PRIMARY_COMP) 2593 prio = ZRAM_SECONDARY_COMP; 2594 2595 prio_max = prio + 1; 2596 continue; 2597 } 2598 } 2599 2600 if (threshold >= huge_class_size) 2601 return -EINVAL; 2602 2603 guard(rwsem_write)(&zram->dev_lock); 2604 if (!init_done(zram)) 2605 return -EINVAL; 2606 2607 if (algo) { 2608 bool found = false; 2609 2610 for (; prio < ZRAM_MAX_COMPS; prio++) { 2611 if (!zram->comp_algs[prio]) 2612 continue; 2613 2614 if (!strcmp(zram->comp_algs[prio], algo)) { 2615 prio_max = prio + 1; 2616 found = true; 2617 break; 2618 } 2619 } 2620 2621 if (!found) { 2622 ret = -EINVAL; 2623 goto out; 2624 } 2625 } 2626 2627 prio_max = min(prio_max, (u32)zram->num_active_comps); 2628 if (prio >= prio_max) { 2629 ret = -EINVAL; 2630 goto out; 2631 } 2632 2633 page = alloc_page(GFP_KERNEL); 2634 if (!page) { 2635 ret = -ENOMEM; 2636 goto out; 2637 } 2638 2639 ctl = init_pp_ctl(); 2640 if (!ctl) { 2641 ret = -ENOMEM; 2642 goto out; 2643 } 2644 2645 scan_slots_for_recompress(zram, mode, prio_max, ctl); 2646 2647 ret = len; 2648 while ((pps = select_pp_slot(ctl))) { 2649 int err = 0; 2650 2651 if (!num_recomp_pages) 2652 break; 2653 2654 slot_lock(zram, pps->index); 2655 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) 2656 goto next; 2657 2658 err = recompress_slot(zram, pps->index, page, 2659 &num_recomp_pages, threshold, 2660 prio, prio_max); 2661 next: 2662 slot_unlock(zram, pps->index); 2663 release_pp_slot(zram, pps); 2664 2665 if (err) { 2666 ret = err; 2667 break; 2668 } 2669 2670 cond_resched(); 2671 } 2672 2673 out: 2674 if (page) 2675 __free_page(page); 2676 release_pp_ctl(zram, ctl); 2677 return ret; 2678 } 2679 #endif 2680 2681 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2682 { 2683 size_t n = bio->bi_iter.bi_size; 2684 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2685 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2686 SECTOR_SHIFT; 2687 2688 /* 2689 * zram manages data in physical block size units. Because logical block 2690 * size isn't identical with physical block size on some arch, we 2691 * could get a discard request pointing to a specific offset within a 2692 * certain physical block. Although we can handle this request by 2693 * reading that physiclal block and decompressing and partially zeroing 2694 * and re-compressing and then re-storing it, this isn't reasonable 2695 * because our intent with a discard request is to save memory. So 2696 * skipping this logical block is appropriate here. 2697 */ 2698 if (offset) { 2699 if (n <= (PAGE_SIZE - offset)) 2700 return; 2701 2702 n -= (PAGE_SIZE - offset); 2703 index++; 2704 } 2705 2706 while (n >= PAGE_SIZE) { 2707 slot_lock(zram, index); 2708 slot_free(zram, index); 2709 slot_unlock(zram, index); 2710 atomic64_inc(&zram->stats.notify_free); 2711 index++; 2712 n -= PAGE_SIZE; 2713 } 2714 2715 bio_endio(bio); 2716 } 2717 2718 static void zram_bio_read(struct zram *zram, struct bio *bio) 2719 { 2720 unsigned long start_time = bio_start_io_acct(bio); 2721 struct bvec_iter iter = bio->bi_iter; 2722 2723 do { 2724 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2725 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2726 SECTOR_SHIFT; 2727 struct bio_vec bv = bio_iter_iovec(bio, iter); 2728 2729 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2730 2731 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2732 atomic64_inc(&zram->stats.failed_reads); 2733 bio->bi_status = BLK_STS_IOERR; 2734 break; 2735 } 2736 flush_dcache_page(bv.bv_page); 2737 2738 slot_lock(zram, index); 2739 mark_slot_accessed(zram, index); 2740 slot_unlock(zram, index); 2741 2742 bio_advance_iter_single(bio, &iter, bv.bv_len); 2743 } while (iter.bi_size); 2744 2745 bio_end_io_acct(bio, start_time); 2746 bio_endio(bio); 2747 } 2748 2749 static void zram_bio_write(struct zram *zram, struct bio *bio) 2750 { 2751 unsigned long start_time = bio_start_io_acct(bio); 2752 struct bvec_iter iter = bio->bi_iter; 2753 2754 do { 2755 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2756 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2757 SECTOR_SHIFT; 2758 struct bio_vec bv = bio_iter_iovec(bio, iter); 2759 2760 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2761 2762 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2763 atomic64_inc(&zram->stats.failed_writes); 2764 bio->bi_status = BLK_STS_IOERR; 2765 break; 2766 } 2767 2768 slot_lock(zram, index); 2769 mark_slot_accessed(zram, index); 2770 slot_unlock(zram, index); 2771 2772 bio_advance_iter_single(bio, &iter, bv.bv_len); 2773 } while (iter.bi_size); 2774 2775 bio_end_io_acct(bio, start_time); 2776 bio_endio(bio); 2777 } 2778 2779 /* 2780 * Handler function for all zram I/O requests. 2781 */ 2782 static void zram_submit_bio(struct bio *bio) 2783 { 2784 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2785 2786 switch (bio_op(bio)) { 2787 case REQ_OP_READ: 2788 zram_bio_read(zram, bio); 2789 break; 2790 case REQ_OP_WRITE: 2791 zram_bio_write(zram, bio); 2792 break; 2793 case REQ_OP_DISCARD: 2794 case REQ_OP_WRITE_ZEROES: 2795 zram_bio_discard(zram, bio); 2796 break; 2797 default: 2798 WARN_ON_ONCE(1); 2799 bio_endio(bio); 2800 } 2801 } 2802 2803 static void zram_slot_free_notify(struct block_device *bdev, 2804 unsigned long index) 2805 { 2806 struct zram *zram; 2807 2808 zram = bdev->bd_disk->private_data; 2809 2810 atomic64_inc(&zram->stats.notify_free); 2811 if (!slot_trylock(zram, index)) { 2812 atomic64_inc(&zram->stats.miss_free); 2813 return; 2814 } 2815 2816 slot_free(zram, index); 2817 slot_unlock(zram, index); 2818 } 2819 2820 static void zram_comp_params_reset(struct zram *zram) 2821 { 2822 u32 prio; 2823 2824 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2825 comp_params_reset(zram, prio); 2826 } 2827 } 2828 2829 static void zram_destroy_comps(struct zram *zram) 2830 { 2831 u32 prio; 2832 2833 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2834 struct zcomp *comp = zram->comps[prio]; 2835 2836 zram->comps[prio] = NULL; 2837 if (!comp) 2838 continue; 2839 zcomp_destroy(comp); 2840 zram->num_active_comps--; 2841 } 2842 2843 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2844 /* Do not free statically defined compression algorithms */ 2845 if (zram->comp_algs[prio] != default_compressor) 2846 kfree(zram->comp_algs[prio]); 2847 zram->comp_algs[prio] = NULL; 2848 } 2849 2850 zram_comp_params_reset(zram); 2851 } 2852 2853 static void zram_reset_device(struct zram *zram) 2854 { 2855 guard(rwsem_write)(&zram->dev_lock); 2856 2857 zram->limit_pages = 0; 2858 2859 set_capacity_and_notify(zram->disk, 0); 2860 part_stat_set_all(zram->disk->part0, 0); 2861 2862 /* I/O operation under all of CPU are done so let's free */ 2863 zram_meta_free(zram, zram->disksize); 2864 zram->disksize = 0; 2865 zram_destroy_comps(zram); 2866 memset(&zram->stats, 0, sizeof(zram->stats)); 2867 reset_bdev(zram); 2868 2869 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2870 } 2871 2872 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, 2873 const char *buf, size_t len) 2874 { 2875 u64 disksize; 2876 struct zcomp *comp; 2877 struct zram *zram = dev_to_zram(dev); 2878 int err; 2879 u32 prio; 2880 2881 disksize = memparse(buf, NULL); 2882 if (!disksize) 2883 return -EINVAL; 2884 2885 guard(rwsem_write)(&zram->dev_lock); 2886 if (init_done(zram)) { 2887 pr_info("Cannot change disksize for initialized device\n"); 2888 return -EBUSY; 2889 } 2890 2891 disksize = PAGE_ALIGN(disksize); 2892 if (!zram_meta_alloc(zram, disksize)) 2893 return -ENOMEM; 2894 2895 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2896 if (!zram->comp_algs[prio]) 2897 continue; 2898 2899 comp = zcomp_create(zram->comp_algs[prio], 2900 &zram->params[prio]); 2901 if (IS_ERR(comp)) { 2902 pr_err("Cannot initialise %s compressing backend\n", 2903 zram->comp_algs[prio]); 2904 err = PTR_ERR(comp); 2905 goto out_free_comps; 2906 } 2907 2908 zram->comps[prio] = comp; 2909 zram->num_active_comps++; 2910 } 2911 zram->disksize = disksize; 2912 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2913 2914 return len; 2915 2916 out_free_comps: 2917 zram_destroy_comps(zram); 2918 zram_meta_free(zram, disksize); 2919 return err; 2920 } 2921 2922 static ssize_t reset_store(struct device *dev, 2923 struct device_attribute *attr, const char *buf, size_t len) 2924 { 2925 int ret; 2926 unsigned short do_reset; 2927 struct zram *zram; 2928 struct gendisk *disk; 2929 2930 ret = kstrtou16(buf, 10, &do_reset); 2931 if (ret) 2932 return ret; 2933 2934 if (!do_reset) 2935 return -EINVAL; 2936 2937 zram = dev_to_zram(dev); 2938 disk = zram->disk; 2939 2940 mutex_lock(&disk->open_mutex); 2941 /* Do not reset an active device or claimed device */ 2942 if (disk_openers(disk) || zram->claim) { 2943 mutex_unlock(&disk->open_mutex); 2944 return -EBUSY; 2945 } 2946 2947 /* From now on, anyone can't open /dev/zram[0-9] */ 2948 zram->claim = true; 2949 mutex_unlock(&disk->open_mutex); 2950 2951 /* Make sure all the pending I/O are finished */ 2952 sync_blockdev(disk->part0); 2953 zram_reset_device(zram); 2954 2955 mutex_lock(&disk->open_mutex); 2956 zram->claim = false; 2957 mutex_unlock(&disk->open_mutex); 2958 2959 return len; 2960 } 2961 2962 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2963 { 2964 struct zram *zram = disk->private_data; 2965 2966 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2967 2968 /* zram was claimed to reset so open request fails */ 2969 if (zram->claim) 2970 return -EBUSY; 2971 return 0; 2972 } 2973 2974 static const struct block_device_operations zram_devops = { 2975 .open = zram_open, 2976 .submit_bio = zram_submit_bio, 2977 .swap_slot_free_notify = zram_slot_free_notify, 2978 .owner = THIS_MODULE 2979 }; 2980 2981 static DEVICE_ATTR_RO(io_stat); 2982 static DEVICE_ATTR_RO(mm_stat); 2983 static DEVICE_ATTR_RO(debug_stat); 2984 static DEVICE_ATTR_WO(compact); 2985 static DEVICE_ATTR_RW(disksize); 2986 static DEVICE_ATTR_RO(initstate); 2987 static DEVICE_ATTR_WO(reset); 2988 static DEVICE_ATTR_WO(mem_limit); 2989 static DEVICE_ATTR_WO(mem_used_max); 2990 static DEVICE_ATTR_WO(idle); 2991 static DEVICE_ATTR_RW(comp_algorithm); 2992 #ifdef CONFIG_ZRAM_WRITEBACK 2993 static DEVICE_ATTR_RO(bd_stat); 2994 static DEVICE_ATTR_RW(backing_dev); 2995 static DEVICE_ATTR_WO(writeback); 2996 static DEVICE_ATTR_RW(writeback_limit); 2997 static DEVICE_ATTR_RW(writeback_limit_enable); 2998 static DEVICE_ATTR_RW(writeback_batch_size); 2999 static DEVICE_ATTR_RW(compressed_writeback); 3000 #endif 3001 #ifdef CONFIG_ZRAM_MULTI_COMP 3002 static DEVICE_ATTR_RW(recomp_algorithm); 3003 static DEVICE_ATTR_WO(recompress); 3004 #endif 3005 static DEVICE_ATTR_WO(algorithm_params); 3006 3007 static struct attribute *zram_disk_attrs[] = { 3008 &dev_attr_disksize.attr, 3009 &dev_attr_initstate.attr, 3010 &dev_attr_reset.attr, 3011 &dev_attr_compact.attr, 3012 &dev_attr_mem_limit.attr, 3013 &dev_attr_mem_used_max.attr, 3014 &dev_attr_idle.attr, 3015 &dev_attr_comp_algorithm.attr, 3016 #ifdef CONFIG_ZRAM_WRITEBACK 3017 &dev_attr_bd_stat.attr, 3018 &dev_attr_backing_dev.attr, 3019 &dev_attr_writeback.attr, 3020 &dev_attr_writeback_limit.attr, 3021 &dev_attr_writeback_limit_enable.attr, 3022 &dev_attr_writeback_batch_size.attr, 3023 &dev_attr_compressed_writeback.attr, 3024 #endif 3025 &dev_attr_io_stat.attr, 3026 &dev_attr_mm_stat.attr, 3027 &dev_attr_debug_stat.attr, 3028 #ifdef CONFIG_ZRAM_MULTI_COMP 3029 &dev_attr_recomp_algorithm.attr, 3030 &dev_attr_recompress.attr, 3031 #endif 3032 &dev_attr_algorithm_params.attr, 3033 NULL, 3034 }; 3035 3036 ATTRIBUTE_GROUPS(zram_disk); 3037 3038 /* 3039 * Allocate and initialize new zram device. the function returns 3040 * '>= 0' device_id upon success, and negative value otherwise. 3041 */ 3042 static int zram_add(void) 3043 { 3044 struct queue_limits lim = { 3045 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 3046 /* 3047 * To ensure that we always get PAGE_SIZE aligned and 3048 * n*PAGE_SIZED sized I/O requests. 3049 */ 3050 .physical_block_size = PAGE_SIZE, 3051 .io_min = PAGE_SIZE, 3052 .io_opt = PAGE_SIZE, 3053 .max_hw_discard_sectors = UINT_MAX, 3054 /* 3055 * zram_bio_discard() will clear all logical blocks if logical 3056 * block size is identical with physical block size(PAGE_SIZE). 3057 * But if it is different, we will skip discarding some parts of 3058 * logical blocks in the part of the request range which isn't 3059 * aligned to physical block size. So we can't ensure that all 3060 * discarded logical blocks are zeroed. 3061 */ 3062 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 3063 .max_write_zeroes_sectors = UINT_MAX, 3064 #endif 3065 .features = BLK_FEAT_STABLE_WRITES | 3066 BLK_FEAT_SYNCHRONOUS, 3067 }; 3068 struct zram *zram; 3069 int ret, device_id; 3070 3071 zram = kzalloc_obj(struct zram); 3072 if (!zram) 3073 return -ENOMEM; 3074 3075 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 3076 if (ret < 0) 3077 goto out_free_dev; 3078 device_id = ret; 3079 3080 init_rwsem(&zram->dev_lock); 3081 #ifdef CONFIG_ZRAM_WRITEBACK 3082 zram->wb_batch_size = 32; 3083 zram->compressed_wb = false; 3084 #endif 3085 3086 /* gendisk structure */ 3087 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 3088 if (IS_ERR(zram->disk)) { 3089 pr_err("Error allocating disk structure for device %d\n", 3090 device_id); 3091 ret = PTR_ERR(zram->disk); 3092 goto out_free_idr; 3093 } 3094 3095 zram->disk->major = zram_major; 3096 zram->disk->first_minor = device_id; 3097 zram->disk->minors = 1; 3098 zram->disk->flags |= GENHD_FL_NO_PART; 3099 zram->disk->fops = &zram_devops; 3100 zram->disk->private_data = zram; 3101 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 3102 zram_comp_params_reset(zram); 3103 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 3104 3105 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 3106 set_capacity(zram->disk, 0); 3107 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 3108 if (ret) 3109 goto out_cleanup_disk; 3110 3111 zram_debugfs_register(zram); 3112 pr_info("Added device: %s\n", zram->disk->disk_name); 3113 return device_id; 3114 3115 out_cleanup_disk: 3116 put_disk(zram->disk); 3117 out_free_idr: 3118 idr_remove(&zram_index_idr, device_id); 3119 out_free_dev: 3120 kfree(zram); 3121 return ret; 3122 } 3123 3124 static int zram_remove(struct zram *zram) 3125 { 3126 bool claimed; 3127 3128 mutex_lock(&zram->disk->open_mutex); 3129 if (disk_openers(zram->disk)) { 3130 mutex_unlock(&zram->disk->open_mutex); 3131 return -EBUSY; 3132 } 3133 3134 claimed = zram->claim; 3135 if (!claimed) 3136 zram->claim = true; 3137 mutex_unlock(&zram->disk->open_mutex); 3138 3139 zram_debugfs_unregister(zram); 3140 3141 if (claimed) { 3142 /* 3143 * If we were claimed by reset_store(), del_gendisk() will 3144 * wait until reset_store() is done, so nothing need to do. 3145 */ 3146 ; 3147 } else { 3148 /* Make sure all the pending I/O are finished */ 3149 sync_blockdev(zram->disk->part0); 3150 zram_reset_device(zram); 3151 } 3152 3153 pr_info("Removed device: %s\n", zram->disk->disk_name); 3154 3155 del_gendisk(zram->disk); 3156 3157 /* del_gendisk drains pending reset_store */ 3158 WARN_ON_ONCE(claimed && zram->claim); 3159 3160 /* 3161 * disksize_store() may be called in between zram_reset_device() 3162 * and del_gendisk(), so run the last reset to avoid leaking 3163 * anything allocated with disksize_store() 3164 */ 3165 zram_reset_device(zram); 3166 3167 put_disk(zram->disk); 3168 kfree(zram); 3169 return 0; 3170 } 3171 3172 /* zram-control sysfs attributes */ 3173 3174 /* 3175 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3176 * sense that reading from this file does alter the state of your system -- it 3177 * creates a new un-initialized zram device and returns back this device's 3178 * device_id (or an error code if it fails to create a new device). 3179 */ 3180 static ssize_t hot_add_show(const struct class *class, 3181 const struct class_attribute *attr, 3182 char *buf) 3183 { 3184 int ret; 3185 3186 mutex_lock(&zram_index_mutex); 3187 ret = zram_add(); 3188 mutex_unlock(&zram_index_mutex); 3189 3190 if (ret < 0) 3191 return ret; 3192 return sysfs_emit(buf, "%d\n", ret); 3193 } 3194 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3195 static struct class_attribute class_attr_hot_add = 3196 __ATTR(hot_add, 0400, hot_add_show, NULL); 3197 3198 static ssize_t hot_remove_store(const struct class *class, 3199 const struct class_attribute *attr, 3200 const char *buf, 3201 size_t count) 3202 { 3203 struct zram *zram; 3204 int ret, dev_id; 3205 3206 /* dev_id is gendisk->first_minor, which is `int' */ 3207 ret = kstrtoint(buf, 10, &dev_id); 3208 if (ret) 3209 return ret; 3210 if (dev_id < 0) 3211 return -EINVAL; 3212 3213 mutex_lock(&zram_index_mutex); 3214 3215 zram = idr_find(&zram_index_idr, dev_id); 3216 if (zram) { 3217 ret = zram_remove(zram); 3218 if (!ret) 3219 idr_remove(&zram_index_idr, dev_id); 3220 } else { 3221 ret = -ENODEV; 3222 } 3223 3224 mutex_unlock(&zram_index_mutex); 3225 return ret ? ret : count; 3226 } 3227 static CLASS_ATTR_WO(hot_remove); 3228 3229 static struct attribute *zram_control_class_attrs[] = { 3230 &class_attr_hot_add.attr, 3231 &class_attr_hot_remove.attr, 3232 NULL, 3233 }; 3234 ATTRIBUTE_GROUPS(zram_control_class); 3235 3236 static struct class zram_control_class = { 3237 .name = "zram-control", 3238 .class_groups = zram_control_class_groups, 3239 }; 3240 3241 static int zram_remove_cb(int id, void *ptr, void *data) 3242 { 3243 WARN_ON_ONCE(zram_remove(ptr)); 3244 return 0; 3245 } 3246 3247 static void destroy_devices(void) 3248 { 3249 class_unregister(&zram_control_class); 3250 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3251 zram_debugfs_destroy(); 3252 idr_destroy(&zram_index_idr); 3253 unregister_blkdev(zram_major, "zram"); 3254 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3255 } 3256 3257 static int __init zram_init(void) 3258 { 3259 struct zram_table_entry zram_te; 3260 int ret; 3261 3262 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); 3263 3264 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3265 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3266 if (ret < 0) 3267 return ret; 3268 3269 ret = class_register(&zram_control_class); 3270 if (ret) { 3271 pr_err("Unable to register zram-control class\n"); 3272 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3273 return ret; 3274 } 3275 3276 zram_debugfs_create(); 3277 zram_major = register_blkdev(0, "zram"); 3278 if (zram_major <= 0) { 3279 pr_err("Unable to get major number\n"); 3280 class_unregister(&zram_control_class); 3281 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3282 return -EBUSY; 3283 } 3284 3285 while (num_devices != 0) { 3286 mutex_lock(&zram_index_mutex); 3287 ret = zram_add(); 3288 mutex_unlock(&zram_index_mutex); 3289 if (ret < 0) 3290 goto out_error; 3291 num_devices--; 3292 } 3293 3294 return 0; 3295 3296 out_error: 3297 destroy_devices(); 3298 return ret; 3299 } 3300 3301 static void __exit zram_exit(void) 3302 { 3303 destroy_devices(); 3304 } 3305 3306 module_init(zram_init); 3307 module_exit(zram_exit); 3308 3309 module_param(num_devices, uint, 0); 3310 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3311 3312 MODULE_LICENSE("Dual BSD/GPL"); 3313 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3314 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3315