1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define pr_fmt(fmt) "zram: " fmt 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/bio.h> 20 #include <linux/bitops.h> 21 #include <linux/blkdev.h> 22 #include <linux/buffer_head.h> 23 #include <linux/device.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/backing-dev.h> 27 #include <linux/string.h> 28 #include <linux/vmalloc.h> 29 #include <linux/err.h> 30 #include <linux/idr.h> 31 #include <linux/sysfs.h> 32 #include <linux/debugfs.h> 33 #include <linux/cpuhotplug.h> 34 #include <linux/part_stat.h> 35 #include <linux/kernel_read_file.h> 36 37 #include "zram_drv.h" 38 39 static DEFINE_IDR(zram_index_idr); 40 /* idr index must be protected */ 41 static DEFINE_MUTEX(zram_index_mutex); 42 43 static int zram_major; 44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 45 46 #define ZRAM_MAX_ALGO_NAME_SZ 128 47 48 /* Module params (documentation at end) */ 49 static unsigned int num_devices = 1; 50 /* 51 * Pages that compress to sizes equals or greater than this are stored 52 * uncompressed in memory. 53 */ 54 static size_t huge_class_size; 55 56 static const struct block_device_operations zram_devops; 57 58 static void slot_free(struct zram *zram, u32 index); 59 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 60 61 static void slot_lock_init(struct zram *zram, u32 index) 62 { 63 static struct lock_class_key __key; 64 65 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 66 &__key, 0); 67 } 68 69 /* 70 * entry locking rules: 71 * 72 * 1) Lock is exclusive 73 * 74 * 2) lock() function can sleep waiting for the lock 75 * 76 * 3) Lock owner can sleep 77 * 78 * 4) Use TRY lock variant when in atomic context 79 * - must check return value and handle locking failers 80 */ 81 static __must_check bool slot_trylock(struct zram *zram, u32 index) 82 { 83 unsigned long *lock = &zram->table[index].__lock; 84 85 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 86 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 87 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 88 return true; 89 } 90 91 return false; 92 } 93 94 static void slot_lock(struct zram *zram, u32 index) 95 { 96 unsigned long *lock = &zram->table[index].__lock; 97 98 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 99 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 100 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 101 } 102 103 static void slot_unlock(struct zram *zram, u32 index) 104 { 105 unsigned long *lock = &zram->table[index].__lock; 106 107 mutex_release(slot_dep_map(zram, index), _RET_IP_); 108 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 109 } 110 111 static inline bool init_done(struct zram *zram) 112 { 113 return zram->disksize; 114 } 115 116 static inline struct zram *dev_to_zram(struct device *dev) 117 { 118 return (struct zram *)dev_to_disk(dev)->private_data; 119 } 120 121 static unsigned long get_slot_handle(struct zram *zram, u32 index) 122 { 123 return zram->table[index].handle; 124 } 125 126 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) 127 { 128 zram->table[index].handle = handle; 129 } 130 131 static bool test_slot_flag(struct zram *zram, u32 index, 132 enum zram_pageflags flag) 133 { 134 return zram->table[index].attr.flags & BIT(flag); 135 } 136 137 static void set_slot_flag(struct zram *zram, u32 index, 138 enum zram_pageflags flag) 139 { 140 zram->table[index].attr.flags |= BIT(flag); 141 } 142 143 static void clear_slot_flag(struct zram *zram, u32 index, 144 enum zram_pageflags flag) 145 { 146 zram->table[index].attr.flags &= ~BIT(flag); 147 } 148 149 static size_t get_slot_size(struct zram *zram, u32 index) 150 { 151 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 152 } 153 154 static void set_slot_size(struct zram *zram, u32 index, size_t size) 155 { 156 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; 157 158 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; 159 } 160 161 static inline bool slot_allocated(struct zram *zram, u32 index) 162 { 163 return get_slot_size(zram, index) || 164 test_slot_flag(zram, index, ZRAM_SAME) || 165 test_slot_flag(zram, index, ZRAM_WB); 166 } 167 168 static inline void set_slot_comp_priority(struct zram *zram, u32 index, 169 u32 prio) 170 { 171 prio &= ZRAM_COMP_PRIORITY_MASK; 172 /* 173 * Clear previous priority value first, in case if we recompress 174 * further an already recompressed page 175 */ 176 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << 177 ZRAM_COMP_PRIORITY_BIT1); 178 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 179 } 180 181 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) 182 { 183 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; 184 185 return prio & ZRAM_COMP_PRIORITY_MASK; 186 } 187 188 static void mark_slot_accessed(struct zram *zram, u32 index) 189 { 190 clear_slot_flag(zram, index, ZRAM_IDLE); 191 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 193 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); 194 #endif 195 } 196 197 static inline void update_used_max(struct zram *zram, const unsigned long pages) 198 { 199 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 200 201 do { 202 if (cur_max >= pages) 203 return; 204 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 205 &cur_max, pages)); 206 } 207 208 static bool zram_can_store_page(struct zram *zram) 209 { 210 unsigned long alloced_pages; 211 212 alloced_pages = zs_get_total_pages(zram->mem_pool); 213 update_used_max(zram, alloced_pages); 214 215 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 216 } 217 218 #if PAGE_SIZE != 4096 219 static inline bool is_partial_io(struct bio_vec *bvec) 220 { 221 return bvec->bv_len != PAGE_SIZE; 222 } 223 #define ZRAM_PARTIAL_IO 1 224 #else 225 static inline bool is_partial_io(struct bio_vec *bvec) 226 { 227 return false; 228 } 229 #endif 230 231 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 232 struct zram_pp_slot { 233 unsigned long index; 234 struct list_head entry; 235 }; 236 237 /* 238 * A post-processing bucket is, essentially, a size class, this defines 239 * the range (in bytes) of pp-slots sizes in particular bucket. 240 */ 241 #define PP_BUCKET_SIZE_RANGE 64 242 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 243 244 struct zram_pp_ctl { 245 struct list_head pp_buckets[NUM_PP_BUCKETS]; 246 }; 247 248 static struct zram_pp_ctl *init_pp_ctl(void) 249 { 250 struct zram_pp_ctl *ctl; 251 u32 idx; 252 253 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); 254 if (!ctl) 255 return NULL; 256 257 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 258 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 259 return ctl; 260 } 261 262 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 263 { 264 list_del_init(&pps->entry); 265 266 slot_lock(zram, pps->index); 267 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 268 slot_unlock(zram, pps->index); 269 270 kfree(pps); 271 } 272 273 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 274 { 275 u32 idx; 276 277 if (!ctl) 278 return; 279 280 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 281 while (!list_empty(&ctl->pp_buckets[idx])) { 282 struct zram_pp_slot *pps; 283 284 pps = list_first_entry(&ctl->pp_buckets[idx], 285 struct zram_pp_slot, 286 entry); 287 release_pp_slot(zram, pps); 288 } 289 } 290 291 kfree(ctl); 292 } 293 294 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 295 u32 index) 296 { 297 struct zram_pp_slot *pps; 298 u32 bid; 299 300 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN); 301 if (!pps) 302 return false; 303 304 INIT_LIST_HEAD(&pps->entry); 305 pps->index = index; 306 307 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 308 list_add(&pps->entry, &ctl->pp_buckets[bid]); 309 310 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 311 return true; 312 } 313 314 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 315 { 316 struct zram_pp_slot *pps = NULL; 317 s32 idx = NUM_PP_BUCKETS - 1; 318 319 /* The higher the bucket id the more optimal slot post-processing is */ 320 while (idx >= 0) { 321 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 322 struct zram_pp_slot, 323 entry); 324 if (pps) 325 break; 326 327 idx--; 328 } 329 return pps; 330 } 331 #endif 332 333 static inline void zram_fill_page(void *ptr, unsigned long len, 334 unsigned long value) 335 { 336 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 337 memset_l(ptr, value, len / sizeof(unsigned long)); 338 } 339 340 static bool page_same_filled(void *ptr, unsigned long *element) 341 { 342 unsigned long *page; 343 unsigned long val; 344 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 345 346 page = (unsigned long *)ptr; 347 val = page[0]; 348 349 if (val != page[last_pos]) 350 return false; 351 352 for (pos = 1; pos < last_pos; pos++) { 353 if (val != page[pos]) 354 return false; 355 } 356 357 *element = val; 358 359 return true; 360 } 361 362 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, 363 char *buf) 364 { 365 u32 val; 366 struct zram *zram = dev_to_zram(dev); 367 368 guard(rwsem_read)(&zram->dev_lock); 369 val = init_done(zram); 370 371 return sysfs_emit(buf, "%u\n", val); 372 } 373 374 static ssize_t disksize_show(struct device *dev, 375 struct device_attribute *attr, char *buf) 376 { 377 struct zram *zram = dev_to_zram(dev); 378 379 return sysfs_emit(buf, "%llu\n", zram->disksize); 380 } 381 382 static ssize_t mem_limit_store(struct device *dev, 383 struct device_attribute *attr, const char *buf, 384 size_t len) 385 { 386 u64 limit; 387 char *tmp; 388 struct zram *zram = dev_to_zram(dev); 389 390 limit = memparse(buf, &tmp); 391 if (buf == tmp) /* no chars parsed, invalid input */ 392 return -EINVAL; 393 394 guard(rwsem_write)(&zram->dev_lock); 395 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 396 397 return len; 398 } 399 400 static ssize_t mem_used_max_store(struct device *dev, 401 struct device_attribute *attr, 402 const char *buf, size_t len) 403 { 404 int err; 405 unsigned long val; 406 struct zram *zram = dev_to_zram(dev); 407 408 err = kstrtoul(buf, 10, &val); 409 if (err || val != 0) 410 return -EINVAL; 411 412 guard(rwsem_read)(&zram->dev_lock); 413 if (init_done(zram)) { 414 atomic_long_set(&zram->stats.max_used_pages, 415 zs_get_total_pages(zram->mem_pool)); 416 } 417 418 return len; 419 } 420 421 /* 422 * Mark all pages which are older than or equal to cutoff as IDLE. 423 * Callers should hold the zram init lock in read mode 424 */ 425 static void mark_idle(struct zram *zram, ktime_t cutoff) 426 { 427 int is_idle = 1; 428 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 429 int index; 430 431 for (index = 0; index < nr_pages; index++) { 432 /* 433 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 434 * post-processing (recompress, writeback) happens to the 435 * ZRAM_SAME slot. 436 * 437 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 438 */ 439 slot_lock(zram, index); 440 if (!slot_allocated(zram, index) || 441 test_slot_flag(zram, index, ZRAM_WB) || 442 test_slot_flag(zram, index, ZRAM_SAME)) { 443 slot_unlock(zram, index); 444 continue; 445 } 446 447 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 448 is_idle = !cutoff || 449 ktime_after(cutoff, zram->table[index].attr.ac_time); 450 #endif 451 if (is_idle) 452 set_slot_flag(zram, index, ZRAM_IDLE); 453 else 454 clear_slot_flag(zram, index, ZRAM_IDLE); 455 slot_unlock(zram, index); 456 } 457 } 458 459 static ssize_t idle_store(struct device *dev, struct device_attribute *attr, 460 const char *buf, size_t len) 461 { 462 struct zram *zram = dev_to_zram(dev); 463 ktime_t cutoff = 0; 464 465 if (!sysfs_streq(buf, "all")) { 466 /* 467 * If it did not parse as 'all' try to treat it as an integer 468 * when we have memory tracking enabled. 469 */ 470 u32 age_sec; 471 472 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && 473 !kstrtouint(buf, 0, &age_sec)) 474 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), 475 age_sec); 476 else 477 return -EINVAL; 478 } 479 480 guard(rwsem_read)(&zram->dev_lock); 481 if (!init_done(zram)) 482 return -EINVAL; 483 484 /* 485 * A cutoff of 0 marks everything as idle, this is the 486 * "all" behavior. 487 */ 488 mark_idle(zram, cutoff); 489 return len; 490 } 491 492 #ifdef CONFIG_ZRAM_WRITEBACK 493 #define INVALID_BDEV_BLOCK (~0UL) 494 495 static int read_from_zspool_raw(struct zram *zram, struct page *page, 496 u32 index); 497 static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 498 499 struct zram_wb_ctl { 500 /* idle list is accessed only by the writeback task, no concurency */ 501 struct list_head idle_reqs; 502 /* done list is accessed concurrently, protect by done_lock */ 503 struct list_head done_reqs; 504 wait_queue_head_t done_wait; 505 spinlock_t done_lock; 506 atomic_t num_inflight; 507 }; 508 509 struct zram_wb_req { 510 unsigned long blk_idx; 511 struct page *page; 512 struct zram_pp_slot *pps; 513 struct bio_vec bio_vec; 514 struct bio bio; 515 516 struct list_head entry; 517 }; 518 519 struct zram_rb_req { 520 struct work_struct work; 521 struct zram *zram; 522 struct page *page; 523 /* The read bio for backing device */ 524 struct bio *bio; 525 unsigned long blk_idx; 526 union { 527 /* The original bio to complete (async read) */ 528 struct bio *parent; 529 /* error status (sync read) */ 530 int error; 531 }; 532 u32 index; 533 }; 534 535 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 536 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, 537 char *buf) 538 { 539 struct zram *zram = dev_to_zram(dev); 540 ssize_t ret; 541 542 guard(rwsem_read)(&zram->dev_lock); 543 ret = sysfs_emit(buf, 544 "%8llu %8llu %8llu\n", 545 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 546 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 547 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 548 549 return ret; 550 } 551 552 static ssize_t writeback_compressed_store(struct device *dev, 553 struct device_attribute *attr, 554 const char *buf, size_t len) 555 { 556 struct zram *zram = dev_to_zram(dev); 557 bool val; 558 559 if (kstrtobool(buf, &val)) 560 return -EINVAL; 561 562 guard(rwsem_write)(&zram->dev_lock); 563 if (init_done(zram)) { 564 return -EBUSY; 565 } 566 567 zram->wb_compressed = val; 568 569 return len; 570 } 571 572 static ssize_t writeback_compressed_show(struct device *dev, 573 struct device_attribute *attr, 574 char *buf) 575 { 576 bool val; 577 struct zram *zram = dev_to_zram(dev); 578 579 guard(rwsem_read)(&zram->dev_lock); 580 val = zram->wb_compressed; 581 582 return sysfs_emit(buf, "%d\n", val); 583 } 584 585 static ssize_t writeback_limit_enable_store(struct device *dev, 586 struct device_attribute *attr, 587 const char *buf, size_t len) 588 { 589 struct zram *zram = dev_to_zram(dev); 590 u64 val; 591 592 if (kstrtoull(buf, 10, &val)) 593 return -EINVAL; 594 595 guard(rwsem_write)(&zram->dev_lock); 596 zram->wb_limit_enable = val; 597 598 return len; 599 } 600 601 static ssize_t writeback_limit_enable_show(struct device *dev, 602 struct device_attribute *attr, 603 char *buf) 604 { 605 bool val; 606 struct zram *zram = dev_to_zram(dev); 607 608 guard(rwsem_read)(&zram->dev_lock); 609 val = zram->wb_limit_enable; 610 611 return sysfs_emit(buf, "%d\n", val); 612 } 613 614 static ssize_t writeback_limit_store(struct device *dev, 615 struct device_attribute *attr, 616 const char *buf, size_t len) 617 { 618 struct zram *zram = dev_to_zram(dev); 619 u64 val; 620 621 if (kstrtoull(buf, 10, &val)) 622 return -EINVAL; 623 624 /* 625 * When the page size is greater than 4KB, if bd_wb_limit is set to 626 * a value that is not page - size aligned, it will cause value 627 * wrapping. For example, when the page size is set to 16KB and 628 * bd_wb_limit is set to 3, a single write - back operation will 629 * cause bd_wb_limit to become -1. Even more terrifying is that 630 * bd_wb_limit is an unsigned number. 631 */ 632 val = rounddown(val, PAGE_SIZE / 4096); 633 634 guard(rwsem_write)(&zram->dev_lock); 635 zram->bd_wb_limit = val; 636 637 return len; 638 } 639 640 static ssize_t writeback_limit_show(struct device *dev, 641 struct device_attribute *attr, char *buf) 642 { 643 u64 val; 644 struct zram *zram = dev_to_zram(dev); 645 646 guard(rwsem_read)(&zram->dev_lock); 647 val = zram->bd_wb_limit; 648 649 return sysfs_emit(buf, "%llu\n", val); 650 } 651 652 static ssize_t writeback_batch_size_store(struct device *dev, 653 struct device_attribute *attr, 654 const char *buf, size_t len) 655 { 656 struct zram *zram = dev_to_zram(dev); 657 u32 val; 658 659 if (kstrtouint(buf, 10, &val)) 660 return -EINVAL; 661 662 if (!val) 663 return -EINVAL; 664 665 guard(rwsem_write)(&zram->dev_lock); 666 zram->wb_batch_size = val; 667 668 return len; 669 } 670 671 static ssize_t writeback_batch_size_show(struct device *dev, 672 struct device_attribute *attr, 673 char *buf) 674 { 675 u32 val; 676 struct zram *zram = dev_to_zram(dev); 677 678 guard(rwsem_read)(&zram->dev_lock); 679 val = zram->wb_batch_size; 680 681 return sysfs_emit(buf, "%u\n", val); 682 } 683 684 static void reset_bdev(struct zram *zram) 685 { 686 if (!zram->backing_dev) 687 return; 688 689 /* hope filp_close flush all of IO */ 690 filp_close(zram->backing_dev, NULL); 691 zram->backing_dev = NULL; 692 zram->bdev = NULL; 693 zram->disk->fops = &zram_devops; 694 kvfree(zram->bitmap); 695 zram->bitmap = NULL; 696 } 697 698 static ssize_t backing_dev_show(struct device *dev, 699 struct device_attribute *attr, char *buf) 700 { 701 struct file *file; 702 struct zram *zram = dev_to_zram(dev); 703 char *p; 704 ssize_t ret; 705 706 guard(rwsem_read)(&zram->dev_lock); 707 file = zram->backing_dev; 708 if (!file) { 709 memcpy(buf, "none\n", 5); 710 return 5; 711 } 712 713 p = file_path(file, buf, PAGE_SIZE - 1); 714 if (IS_ERR(p)) 715 return PTR_ERR(p); 716 717 ret = strlen(p); 718 memmove(buf, p, ret); 719 buf[ret++] = '\n'; 720 return ret; 721 } 722 723 static ssize_t backing_dev_store(struct device *dev, 724 struct device_attribute *attr, const char *buf, 725 size_t len) 726 { 727 char *file_name; 728 size_t sz; 729 struct file *backing_dev = NULL; 730 struct inode *inode; 731 unsigned int bitmap_sz; 732 unsigned long nr_pages, *bitmap = NULL; 733 int err; 734 struct zram *zram = dev_to_zram(dev); 735 736 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 737 if (!file_name) 738 return -ENOMEM; 739 740 guard(rwsem_write)(&zram->dev_lock); 741 if (init_done(zram)) { 742 pr_info("Can't setup backing device for initialized device\n"); 743 err = -EBUSY; 744 goto out; 745 } 746 747 strscpy(file_name, buf, PATH_MAX); 748 /* ignore trailing newline */ 749 sz = strlen(file_name); 750 if (sz > 0 && file_name[sz - 1] == '\n') 751 file_name[sz - 1] = 0x00; 752 753 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 754 if (IS_ERR(backing_dev)) { 755 err = PTR_ERR(backing_dev); 756 backing_dev = NULL; 757 goto out; 758 } 759 760 inode = backing_dev->f_mapping->host; 761 762 /* Support only block device in this moment */ 763 if (!S_ISBLK(inode->i_mode)) { 764 err = -ENOTBLK; 765 goto out; 766 } 767 768 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 769 /* Refuse to use zero sized device (also prevents self reference) */ 770 if (!nr_pages) { 771 err = -EINVAL; 772 goto out; 773 } 774 775 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 776 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 777 if (!bitmap) { 778 err = -ENOMEM; 779 goto out; 780 } 781 782 reset_bdev(zram); 783 784 zram->bdev = I_BDEV(inode); 785 zram->backing_dev = backing_dev; 786 zram->bitmap = bitmap; 787 zram->nr_pages = nr_pages; 788 789 pr_info("setup backing device %s\n", file_name); 790 kfree(file_name); 791 792 return len; 793 out: 794 kvfree(bitmap); 795 796 if (backing_dev) 797 filp_close(backing_dev, NULL); 798 799 kfree(file_name); 800 801 return err; 802 } 803 804 static unsigned long zram_reserve_bdev_block(struct zram *zram) 805 { 806 unsigned long blk_idx; 807 808 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 809 if (blk_idx == zram->nr_pages) 810 return INVALID_BDEV_BLOCK; 811 812 set_bit(blk_idx, zram->bitmap); 813 atomic64_inc(&zram->stats.bd_count); 814 return blk_idx; 815 } 816 817 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 818 { 819 int was_set; 820 821 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 822 WARN_ON_ONCE(!was_set); 823 atomic64_dec(&zram->stats.bd_count); 824 } 825 826 static void release_wb_req(struct zram_wb_req *req) 827 { 828 __free_page(req->page); 829 kfree(req); 830 } 831 832 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 833 { 834 if (!wb_ctl) 835 return; 836 837 /* We should never have inflight requests at this point */ 838 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 839 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 840 841 while (!list_empty(&wb_ctl->idle_reqs)) { 842 struct zram_wb_req *req; 843 844 req = list_first_entry(&wb_ctl->idle_reqs, 845 struct zram_wb_req, entry); 846 list_del(&req->entry); 847 release_wb_req(req); 848 } 849 850 kfree(wb_ctl); 851 } 852 853 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 854 { 855 struct zram_wb_ctl *wb_ctl; 856 int i; 857 858 wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL); 859 if (!wb_ctl) 860 return NULL; 861 862 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 863 INIT_LIST_HEAD(&wb_ctl->done_reqs); 864 atomic_set(&wb_ctl->num_inflight, 0); 865 init_waitqueue_head(&wb_ctl->done_wait); 866 spin_lock_init(&wb_ctl->done_lock); 867 868 for (i = 0; i < zram->wb_batch_size; i++) { 869 struct zram_wb_req *req; 870 871 /* 872 * This is fatal condition only if we couldn't allocate 873 * any requests at all. Otherwise we just work with the 874 * requests that we have successfully allocated, so that 875 * writeback can still proceed, even if there is only one 876 * request on the idle list. 877 */ 878 req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN); 879 if (!req) 880 break; 881 882 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 883 if (!req->page) { 884 kfree(req); 885 break; 886 } 887 888 list_add(&req->entry, &wb_ctl->idle_reqs); 889 } 890 891 /* We couldn't allocate any requests, so writeabck is not possible */ 892 if (list_empty(&wb_ctl->idle_reqs)) 893 goto release_wb_ctl; 894 895 return wb_ctl; 896 897 release_wb_ctl: 898 release_wb_ctl(wb_ctl); 899 return NULL; 900 } 901 902 static void zram_account_writeback_rollback(struct zram *zram) 903 { 904 lockdep_assert_held_write(&zram->dev_lock); 905 906 if (zram->wb_limit_enable) 907 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 908 } 909 910 static void zram_account_writeback_submit(struct zram *zram) 911 { 912 lockdep_assert_held_write(&zram->dev_lock); 913 914 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 915 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 916 } 917 918 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 919 { 920 u32 size, index = req->pps->index; 921 int err, prio; 922 bool huge; 923 924 err = blk_status_to_errno(req->bio.bi_status); 925 if (err) { 926 /* 927 * Failed wb requests should not be accounted in wb_limit 928 * (if enabled). 929 */ 930 zram_account_writeback_rollback(zram); 931 zram_release_bdev_block(zram, req->blk_idx); 932 return err; 933 } 934 935 atomic64_inc(&zram->stats.bd_writes); 936 slot_lock(zram, index); 937 /* 938 * We release slot lock during writeback so slot can change under us: 939 * slot_free() or slot_free() and zram_write_page(). In both cases 940 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 941 * set ZRAM_PP_SLOT on such slots until current post-processing 942 * finishes. 943 */ 944 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { 945 zram_release_bdev_block(zram, req->blk_idx); 946 goto out; 947 } 948 949 if (zram->wb_compressed) { 950 /* 951 * ZRAM_WB slots get freed, we need to preserve data required 952 * for read decompression. 953 */ 954 size = get_slot_size(zram, index); 955 prio = get_slot_comp_priority(zram, index); 956 huge = test_slot_flag(zram, index, ZRAM_HUGE); 957 } 958 959 slot_free(zram, index); 960 set_slot_flag(zram, index, ZRAM_WB); 961 set_slot_handle(zram, index, req->blk_idx); 962 963 if (zram->wb_compressed) { 964 if (huge) 965 set_slot_flag(zram, index, ZRAM_HUGE); 966 set_slot_size(zram, index, size); 967 set_slot_comp_priority(zram, index, prio); 968 } 969 970 atomic64_inc(&zram->stats.pages_stored); 971 972 out: 973 slot_unlock(zram, index); 974 return 0; 975 } 976 977 static void zram_writeback_endio(struct bio *bio) 978 { 979 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 980 struct zram_wb_ctl *wb_ctl = bio->bi_private; 981 unsigned long flags; 982 983 spin_lock_irqsave(&wb_ctl->done_lock, flags); 984 list_add(&req->entry, &wb_ctl->done_reqs); 985 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 986 987 wake_up(&wb_ctl->done_wait); 988 } 989 990 static void zram_submit_wb_request(struct zram *zram, 991 struct zram_wb_ctl *wb_ctl, 992 struct zram_wb_req *req) 993 { 994 /* 995 * wb_limit (if enabled) should be adjusted before submission, 996 * so that we don't over-submit. 997 */ 998 zram_account_writeback_submit(zram); 999 atomic_inc(&wb_ctl->num_inflight); 1000 req->bio.bi_private = wb_ctl; 1001 submit_bio(&req->bio); 1002 } 1003 1004 static int zram_complete_done_reqs(struct zram *zram, 1005 struct zram_wb_ctl *wb_ctl) 1006 { 1007 struct zram_wb_req *req; 1008 unsigned long flags; 1009 int ret = 0, err; 1010 1011 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1012 spin_lock_irqsave(&wb_ctl->done_lock, flags); 1013 req = list_first_entry_or_null(&wb_ctl->done_reqs, 1014 struct zram_wb_req, entry); 1015 if (req) 1016 list_del(&req->entry); 1017 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 1018 1019 /* ->num_inflight > 0 doesn't mean we have done requests */ 1020 if (!req) 1021 break; 1022 1023 err = zram_writeback_complete(zram, req); 1024 if (err) 1025 ret = err; 1026 1027 atomic_dec(&wb_ctl->num_inflight); 1028 release_pp_slot(zram, req->pps); 1029 req->pps = NULL; 1030 1031 list_add(&req->entry, &wb_ctl->idle_reqs); 1032 } 1033 1034 return ret; 1035 } 1036 1037 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 1038 { 1039 struct zram_wb_req *req; 1040 1041 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 1042 struct zram_wb_req, entry); 1043 if (req) 1044 list_del(&req->entry); 1045 return req; 1046 } 1047 1048 static int zram_writeback_slots(struct zram *zram, 1049 struct zram_pp_ctl *ctl, 1050 struct zram_wb_ctl *wb_ctl) 1051 { 1052 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1053 struct zram_wb_req *req = NULL; 1054 struct zram_pp_slot *pps; 1055 int ret = 0, err = 0; 1056 u32 index = 0; 1057 1058 while ((pps = select_pp_slot(ctl))) { 1059 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1060 ret = -EIO; 1061 break; 1062 } 1063 1064 while (!req) { 1065 req = zram_select_idle_req(wb_ctl); 1066 if (req) 1067 break; 1068 1069 wait_event(wb_ctl->done_wait, 1070 !list_empty(&wb_ctl->done_reqs)); 1071 1072 err = zram_complete_done_reqs(zram, wb_ctl); 1073 /* 1074 * BIO errors are not fatal, we continue and simply 1075 * attempt to writeback the remaining objects (pages). 1076 * At the same time we need to signal user-space that 1077 * some writes (at least one, but also could be all of 1078 * them) were not successful and we do so by returning 1079 * the most recent BIO error. 1080 */ 1081 if (err) 1082 ret = err; 1083 } 1084 1085 if (blk_idx == INVALID_BDEV_BLOCK) { 1086 blk_idx = zram_reserve_bdev_block(zram); 1087 if (blk_idx == INVALID_BDEV_BLOCK) { 1088 ret = -ENOSPC; 1089 break; 1090 } 1091 } 1092 1093 index = pps->index; 1094 slot_lock(zram, index); 1095 /* 1096 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1097 * slots can change in the meantime. If slots are accessed or 1098 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1099 * post-process them. 1100 */ 1101 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) 1102 goto next; 1103 if (zram->wb_compressed) 1104 err = read_from_zspool_raw(zram, req->page, index); 1105 else 1106 err = read_from_zspool(zram, req->page, index); 1107 if (err) 1108 goto next; 1109 slot_unlock(zram, index); 1110 1111 /* 1112 * From now on pp-slot is owned by the req, remove it from 1113 * its pp bucket. 1114 */ 1115 list_del_init(&pps->entry); 1116 1117 req->blk_idx = blk_idx; 1118 req->pps = pps; 1119 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1120 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1121 req->bio.bi_end_io = zram_writeback_endio; 1122 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1123 1124 zram_submit_wb_request(zram, wb_ctl, req); 1125 blk_idx = INVALID_BDEV_BLOCK; 1126 req = NULL; 1127 cond_resched(); 1128 continue; 1129 1130 next: 1131 slot_unlock(zram, index); 1132 release_pp_slot(zram, pps); 1133 } 1134 1135 /* 1136 * Selected idle req, but never submitted it due to some error or 1137 * wb limit. 1138 */ 1139 if (req) 1140 release_wb_req(req); 1141 1142 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1143 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1144 err = zram_complete_done_reqs(zram, wb_ctl); 1145 if (err) 1146 ret = err; 1147 } 1148 1149 return ret; 1150 } 1151 1152 #define PAGE_WRITEBACK 0 1153 #define HUGE_WRITEBACK (1 << 0) 1154 #define IDLE_WRITEBACK (1 << 1) 1155 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1156 1157 static int parse_page_index(char *val, unsigned long nr_pages, 1158 unsigned long *lo, unsigned long *hi) 1159 { 1160 int ret; 1161 1162 ret = kstrtoul(val, 10, lo); 1163 if (ret) 1164 return ret; 1165 if (*lo >= nr_pages) 1166 return -ERANGE; 1167 *hi = *lo + 1; 1168 return 0; 1169 } 1170 1171 static int parse_page_indexes(char *val, unsigned long nr_pages, 1172 unsigned long *lo, unsigned long *hi) 1173 { 1174 char *delim; 1175 int ret; 1176 1177 delim = strchr(val, '-'); 1178 if (!delim) 1179 return -EINVAL; 1180 1181 *delim = 0x00; 1182 ret = kstrtoul(val, 10, lo); 1183 if (ret) 1184 return ret; 1185 if (*lo >= nr_pages) 1186 return -ERANGE; 1187 1188 ret = kstrtoul(delim + 1, 10, hi); 1189 if (ret) 1190 return ret; 1191 if (*hi >= nr_pages || *lo > *hi) 1192 return -ERANGE; 1193 *hi += 1; 1194 return 0; 1195 } 1196 1197 static int parse_mode(char *val, u32 *mode) 1198 { 1199 *mode = 0; 1200 1201 if (!strcmp(val, "idle")) 1202 *mode = IDLE_WRITEBACK; 1203 if (!strcmp(val, "huge")) 1204 *mode = HUGE_WRITEBACK; 1205 if (!strcmp(val, "huge_idle")) 1206 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1207 if (!strcmp(val, "incompressible")) 1208 *mode = INCOMPRESSIBLE_WRITEBACK; 1209 1210 if (*mode == 0) 1211 return -EINVAL; 1212 return 0; 1213 } 1214 1215 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 1216 unsigned long lo, unsigned long hi, 1217 struct zram_pp_ctl *ctl) 1218 { 1219 u32 index = lo; 1220 1221 while (index < hi) { 1222 bool ok = true; 1223 1224 slot_lock(zram, index); 1225 if (!slot_allocated(zram, index)) 1226 goto next; 1227 1228 if (test_slot_flag(zram, index, ZRAM_WB) || 1229 test_slot_flag(zram, index, ZRAM_SAME)) 1230 goto next; 1231 1232 if (mode & IDLE_WRITEBACK && 1233 !test_slot_flag(zram, index, ZRAM_IDLE)) 1234 goto next; 1235 if (mode & HUGE_WRITEBACK && 1236 !test_slot_flag(zram, index, ZRAM_HUGE)) 1237 goto next; 1238 if (mode & INCOMPRESSIBLE_WRITEBACK && 1239 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1240 goto next; 1241 1242 ok = place_pp_slot(zram, ctl, index); 1243 next: 1244 slot_unlock(zram, index); 1245 if (!ok) 1246 break; 1247 index++; 1248 } 1249 1250 return 0; 1251 } 1252 1253 static ssize_t writeback_store(struct device *dev, 1254 struct device_attribute *attr, 1255 const char *buf, size_t len) 1256 { 1257 struct zram *zram = dev_to_zram(dev); 1258 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1259 unsigned long lo = 0, hi = nr_pages; 1260 struct zram_pp_ctl *pp_ctl = NULL; 1261 struct zram_wb_ctl *wb_ctl = NULL; 1262 char *args, *param, *val; 1263 ssize_t ret = len; 1264 int err, mode = 0; 1265 1266 guard(rwsem_write)(&zram->dev_lock); 1267 if (!init_done(zram)) 1268 return -EINVAL; 1269 1270 if (!zram->backing_dev) 1271 return -ENODEV; 1272 1273 pp_ctl = init_pp_ctl(); 1274 if (!pp_ctl) 1275 return -ENOMEM; 1276 1277 wb_ctl = init_wb_ctl(zram); 1278 if (!wb_ctl) { 1279 ret = -ENOMEM; 1280 goto out; 1281 } 1282 1283 args = skip_spaces(buf); 1284 while (*args) { 1285 args = next_arg(args, ¶m, &val); 1286 1287 /* 1288 * Workaround to support the old writeback interface. 1289 * 1290 * The old writeback interface has a minor inconsistency and 1291 * requires key=value only for page_index parameter, while the 1292 * writeback mode is a valueless parameter. 1293 * 1294 * This is not the case anymore and now all parameters are 1295 * required to have values, however, we need to support the 1296 * legacy writeback interface format so we check if we can 1297 * recognize a valueless parameter as the (legacy) writeback 1298 * mode. 1299 */ 1300 if (!val || !*val) { 1301 err = parse_mode(param, &mode); 1302 if (err) { 1303 ret = err; 1304 goto out; 1305 } 1306 1307 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1308 break; 1309 } 1310 1311 if (!strcmp(param, "type")) { 1312 err = parse_mode(val, &mode); 1313 if (err) { 1314 ret = err; 1315 goto out; 1316 } 1317 1318 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1319 break; 1320 } 1321 1322 if (!strcmp(param, "page_index")) { 1323 err = parse_page_index(val, nr_pages, &lo, &hi); 1324 if (err) { 1325 ret = err; 1326 goto out; 1327 } 1328 1329 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1330 continue; 1331 } 1332 1333 if (!strcmp(param, "page_indexes")) { 1334 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1335 if (err) { 1336 ret = err; 1337 goto out; 1338 } 1339 1340 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1341 continue; 1342 } 1343 } 1344 1345 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1346 if (err) 1347 ret = err; 1348 1349 out: 1350 release_pp_ctl(zram, pp_ctl); 1351 release_wb_ctl(wb_ctl); 1352 1353 return ret; 1354 } 1355 1356 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1357 { 1358 struct zcomp_strm *zstrm; 1359 unsigned int size; 1360 int ret, prio; 1361 void *src; 1362 1363 slot_lock(zram, index); 1364 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1365 if (!test_slot_flag(zram, index, ZRAM_WB)) { 1366 slot_unlock(zram, index); 1367 /* We read some stale data, zero it out */ 1368 memset_page(page, 0, 0, PAGE_SIZE); 1369 return -EIO; 1370 } 1371 1372 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1373 slot_unlock(zram, index); 1374 return 0; 1375 } 1376 1377 size = get_slot_size(zram, index); 1378 prio = get_slot_comp_priority(zram, index); 1379 1380 zstrm = zcomp_stream_get(zram->comps[prio]); 1381 src = kmap_local_page(page); 1382 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1383 zstrm->local_copy); 1384 if (!ret) 1385 copy_page(src, zstrm->local_copy); 1386 kunmap_local(src); 1387 zcomp_stream_put(zstrm); 1388 slot_unlock(zram, index); 1389 1390 return ret; 1391 } 1392 1393 static void zram_deferred_decompress(struct work_struct *w) 1394 { 1395 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1396 struct page *page = bio_first_page_all(req->bio); 1397 struct zram *zram = req->zram; 1398 u32 index = req->index; 1399 int ret; 1400 1401 ret = decompress_bdev_page(zram, page, index); 1402 if (ret) 1403 req->parent->bi_status = BLK_STS_IOERR; 1404 1405 /* Decrement parent's ->remaining */ 1406 bio_endio(req->parent); 1407 bio_put(req->bio); 1408 kfree(req); 1409 } 1410 1411 static void zram_async_read_endio(struct bio *bio) 1412 { 1413 struct zram_rb_req *req = bio->bi_private; 1414 struct zram *zram = req->zram; 1415 1416 if (bio->bi_status) { 1417 req->parent->bi_status = bio->bi_status; 1418 bio_endio(req->parent); 1419 bio_put(bio); 1420 kfree(req); 1421 return; 1422 } 1423 1424 /* 1425 * NOTE: zram_async_read_endio() is not exactly right place for this. 1426 * Ideally, we need to do it after ZRAM_WB check, but this requires 1427 * us to use wq path even on systems that don't enable compressed 1428 * writeback, because we cannot take slot-lock in the current context. 1429 * 1430 * Keep the existing behavior for now. 1431 */ 1432 if (zram->wb_compressed == false) { 1433 /* No decompression needed, complete the parent IO */ 1434 bio_endio(req->parent); 1435 bio_put(bio); 1436 kfree(req); 1437 return; 1438 } 1439 1440 /* 1441 * zram decompression is sleepable, so we need to deffer it to 1442 * a preemptible context. 1443 */ 1444 INIT_WORK(&req->work, zram_deferred_decompress); 1445 queue_work(system_highpri_wq, &req->work); 1446 } 1447 1448 static void read_from_bdev_async(struct zram *zram, struct page *page, 1449 u32 index, unsigned long blk_idx, 1450 struct bio *parent) 1451 { 1452 struct zram_rb_req *req; 1453 struct bio *bio; 1454 1455 req = kmalloc(sizeof(*req), GFP_NOIO); 1456 if (!req) 1457 return; 1458 1459 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1460 if (!bio) { 1461 kfree(req); 1462 return; 1463 } 1464 1465 req->zram = zram; 1466 req->index = index; 1467 req->blk_idx = blk_idx; 1468 req->bio = bio; 1469 req->parent = parent; 1470 1471 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1472 bio->bi_private = req; 1473 bio->bi_end_io = zram_async_read_endio; 1474 1475 __bio_add_page(bio, page, PAGE_SIZE, 0); 1476 bio_inc_remaining(parent); 1477 submit_bio(bio); 1478 } 1479 1480 static void zram_sync_read(struct work_struct *w) 1481 { 1482 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1483 struct bio_vec bv; 1484 struct bio bio; 1485 1486 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1487 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1488 __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1489 req->error = submit_bio_wait(&bio); 1490 } 1491 1492 /* 1493 * Block layer want one ->submit_bio to be active at a time, so if we use 1494 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1495 * use a worker thread context. 1496 */ 1497 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1498 unsigned long blk_idx) 1499 { 1500 struct zram_rb_req req; 1501 1502 req.page = page; 1503 req.zram = zram; 1504 req.blk_idx = blk_idx; 1505 1506 INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1507 queue_work(system_dfl_wq, &req.work); 1508 flush_work(&req.work); 1509 destroy_work_on_stack(&req.work); 1510 1511 if (req.error || zram->wb_compressed == false) 1512 return req.error; 1513 1514 return decompress_bdev_page(zram, page, index); 1515 } 1516 1517 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1518 unsigned long blk_idx, struct bio *parent) 1519 { 1520 atomic64_inc(&zram->stats.bd_reads); 1521 if (!parent) { 1522 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1523 return -EIO; 1524 return read_from_bdev_sync(zram, page, index, blk_idx); 1525 } 1526 read_from_bdev_async(zram, page, index, blk_idx, parent); 1527 return 0; 1528 } 1529 #else 1530 static inline void reset_bdev(struct zram *zram) {}; 1531 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1532 unsigned long blk_idx, struct bio *parent) 1533 { 1534 return -EIO; 1535 } 1536 1537 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1538 { 1539 } 1540 #endif 1541 1542 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1543 1544 static struct dentry *zram_debugfs_root; 1545 1546 static void zram_debugfs_create(void) 1547 { 1548 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1549 } 1550 1551 static void zram_debugfs_destroy(void) 1552 { 1553 debugfs_remove_recursive(zram_debugfs_root); 1554 } 1555 1556 static ssize_t read_block_state(struct file *file, char __user *buf, 1557 size_t count, loff_t *ppos) 1558 { 1559 char *kbuf; 1560 ssize_t index, written = 0; 1561 struct zram *zram = file->private_data; 1562 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1563 1564 kbuf = kvmalloc(count, GFP_KERNEL); 1565 if (!kbuf) 1566 return -ENOMEM; 1567 1568 guard(rwsem_read)(&zram->dev_lock); 1569 if (!init_done(zram)) { 1570 kvfree(kbuf); 1571 return -EINVAL; 1572 } 1573 1574 for (index = *ppos; index < nr_pages; index++) { 1575 int copied; 1576 1577 slot_lock(zram, index); 1578 if (!slot_allocated(zram, index)) 1579 goto next; 1580 1581 copied = snprintf(kbuf + written, count, 1582 "%12zd %12u.%06d %c%c%c%c%c%c\n", 1583 index, zram->table[index].attr.ac_time, 0, 1584 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1585 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1586 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1587 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1588 get_slot_comp_priority(zram, index) ? 'r' : '.', 1589 test_slot_flag(zram, index, 1590 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1591 1592 if (count <= copied) { 1593 slot_unlock(zram, index); 1594 break; 1595 } 1596 written += copied; 1597 count -= copied; 1598 next: 1599 slot_unlock(zram, index); 1600 *ppos += 1; 1601 } 1602 1603 if (copy_to_user(buf, kbuf, written)) 1604 written = -EFAULT; 1605 kvfree(kbuf); 1606 1607 return written; 1608 } 1609 1610 static const struct file_operations proc_zram_block_state_op = { 1611 .open = simple_open, 1612 .read = read_block_state, 1613 .llseek = default_llseek, 1614 }; 1615 1616 static void zram_debugfs_register(struct zram *zram) 1617 { 1618 if (!zram_debugfs_root) 1619 return; 1620 1621 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1622 zram_debugfs_root); 1623 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1624 zram, &proc_zram_block_state_op); 1625 } 1626 1627 static void zram_debugfs_unregister(struct zram *zram) 1628 { 1629 debugfs_remove_recursive(zram->debugfs_dir); 1630 } 1631 #else 1632 static void zram_debugfs_create(void) {}; 1633 static void zram_debugfs_destroy(void) {}; 1634 static void zram_debugfs_register(struct zram *zram) {}; 1635 static void zram_debugfs_unregister(struct zram *zram) {}; 1636 #endif 1637 1638 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1639 { 1640 /* Do not free statically defined compression algorithms */ 1641 if (zram->comp_algs[prio] != default_compressor) 1642 kfree(zram->comp_algs[prio]); 1643 1644 zram->comp_algs[prio] = alg; 1645 } 1646 1647 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1648 { 1649 char *compressor; 1650 size_t sz; 1651 1652 sz = strlen(buf); 1653 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1654 return -E2BIG; 1655 1656 compressor = kstrdup(buf, GFP_KERNEL); 1657 if (!compressor) 1658 return -ENOMEM; 1659 1660 /* ignore trailing newline */ 1661 if (sz > 0 && compressor[sz - 1] == '\n') 1662 compressor[sz - 1] = 0x00; 1663 1664 if (!zcomp_available_algorithm(compressor)) { 1665 kfree(compressor); 1666 return -EINVAL; 1667 } 1668 1669 guard(rwsem_write)(&zram->dev_lock); 1670 if (init_done(zram)) { 1671 kfree(compressor); 1672 pr_info("Can't change algorithm for initialized device\n"); 1673 return -EBUSY; 1674 } 1675 1676 comp_algorithm_set(zram, prio, compressor); 1677 return 0; 1678 } 1679 1680 static void comp_params_reset(struct zram *zram, u32 prio) 1681 { 1682 struct zcomp_params *params = &zram->params[prio]; 1683 1684 vfree(params->dict); 1685 params->level = ZCOMP_PARAM_NOT_SET; 1686 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1687 params->dict_sz = 0; 1688 params->dict = NULL; 1689 } 1690 1691 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1692 const char *dict_path, 1693 struct deflate_params *deflate_params) 1694 { 1695 ssize_t sz = 0; 1696 1697 comp_params_reset(zram, prio); 1698 1699 if (dict_path) { 1700 sz = kernel_read_file_from_path(dict_path, 0, 1701 &zram->params[prio].dict, 1702 INT_MAX, 1703 NULL, 1704 READING_POLICY); 1705 if (sz < 0) 1706 return -EINVAL; 1707 } 1708 1709 zram->params[prio].dict_sz = sz; 1710 zram->params[prio].level = level; 1711 zram->params[prio].deflate.winbits = deflate_params->winbits; 1712 return 0; 1713 } 1714 1715 static ssize_t algorithm_params_store(struct device *dev, 1716 struct device_attribute *attr, 1717 const char *buf, 1718 size_t len) 1719 { 1720 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1721 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1722 struct deflate_params deflate_params; 1723 struct zram *zram = dev_to_zram(dev); 1724 int ret; 1725 1726 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1727 1728 args = skip_spaces(buf); 1729 while (*args) { 1730 args = next_arg(args, ¶m, &val); 1731 1732 if (!val || !*val) 1733 return -EINVAL; 1734 1735 if (!strcmp(param, "priority")) { 1736 ret = kstrtoint(val, 10, &prio); 1737 if (ret) 1738 return ret; 1739 continue; 1740 } 1741 1742 if (!strcmp(param, "level")) { 1743 ret = kstrtoint(val, 10, &level); 1744 if (ret) 1745 return ret; 1746 continue; 1747 } 1748 1749 if (!strcmp(param, "algo")) { 1750 algo = val; 1751 continue; 1752 } 1753 1754 if (!strcmp(param, "dict")) { 1755 dict_path = val; 1756 continue; 1757 } 1758 1759 if (!strcmp(param, "deflate.winbits")) { 1760 ret = kstrtoint(val, 10, &deflate_params.winbits); 1761 if (ret) 1762 return ret; 1763 continue; 1764 } 1765 } 1766 1767 /* Lookup priority by algorithm name */ 1768 if (algo) { 1769 s32 p; 1770 1771 prio = -EINVAL; 1772 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1773 if (!zram->comp_algs[p]) 1774 continue; 1775 1776 if (!strcmp(zram->comp_algs[p], algo)) { 1777 prio = p; 1778 break; 1779 } 1780 } 1781 } 1782 1783 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1784 return -EINVAL; 1785 1786 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1787 return ret ? ret : len; 1788 } 1789 1790 static ssize_t comp_algorithm_show(struct device *dev, 1791 struct device_attribute *attr, 1792 char *buf) 1793 { 1794 struct zram *zram = dev_to_zram(dev); 1795 ssize_t sz; 1796 1797 guard(rwsem_read)(&zram->dev_lock); 1798 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1799 return sz; 1800 } 1801 1802 static ssize_t comp_algorithm_store(struct device *dev, 1803 struct device_attribute *attr, 1804 const char *buf, 1805 size_t len) 1806 { 1807 struct zram *zram = dev_to_zram(dev); 1808 int ret; 1809 1810 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1811 return ret ? ret : len; 1812 } 1813 1814 #ifdef CONFIG_ZRAM_MULTI_COMP 1815 static ssize_t recomp_algorithm_show(struct device *dev, 1816 struct device_attribute *attr, 1817 char *buf) 1818 { 1819 struct zram *zram = dev_to_zram(dev); 1820 ssize_t sz = 0; 1821 u32 prio; 1822 1823 guard(rwsem_read)(&zram->dev_lock); 1824 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1825 if (!zram->comp_algs[prio]) 1826 continue; 1827 1828 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1829 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1830 } 1831 return sz; 1832 } 1833 1834 static ssize_t recomp_algorithm_store(struct device *dev, 1835 struct device_attribute *attr, 1836 const char *buf, 1837 size_t len) 1838 { 1839 struct zram *zram = dev_to_zram(dev); 1840 int prio = ZRAM_SECONDARY_COMP; 1841 char *args, *param, *val; 1842 char *alg = NULL; 1843 int ret; 1844 1845 args = skip_spaces(buf); 1846 while (*args) { 1847 args = next_arg(args, ¶m, &val); 1848 1849 if (!val || !*val) 1850 return -EINVAL; 1851 1852 if (!strcmp(param, "algo")) { 1853 alg = val; 1854 continue; 1855 } 1856 1857 if (!strcmp(param, "priority")) { 1858 ret = kstrtoint(val, 10, &prio); 1859 if (ret) 1860 return ret; 1861 continue; 1862 } 1863 } 1864 1865 if (!alg) 1866 return -EINVAL; 1867 1868 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1869 return -EINVAL; 1870 1871 ret = __comp_algorithm_store(zram, prio, alg); 1872 return ret ? ret : len; 1873 } 1874 #endif 1875 1876 static ssize_t compact_store(struct device *dev, struct device_attribute *attr, 1877 const char *buf, size_t len) 1878 { 1879 struct zram *zram = dev_to_zram(dev); 1880 1881 guard(rwsem_read)(&zram->dev_lock); 1882 if (!init_done(zram)) 1883 return -EINVAL; 1884 1885 zs_compact(zram->mem_pool); 1886 1887 return len; 1888 } 1889 1890 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, 1891 char *buf) 1892 { 1893 struct zram *zram = dev_to_zram(dev); 1894 ssize_t ret; 1895 1896 guard(rwsem_read)(&zram->dev_lock); 1897 ret = sysfs_emit(buf, 1898 "%8llu %8llu 0 %8llu\n", 1899 (u64)atomic64_read(&zram->stats.failed_reads), 1900 (u64)atomic64_read(&zram->stats.failed_writes), 1901 (u64)atomic64_read(&zram->stats.notify_free)); 1902 1903 return ret; 1904 } 1905 1906 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, 1907 char *buf) 1908 { 1909 struct zram *zram = dev_to_zram(dev); 1910 struct zs_pool_stats pool_stats; 1911 u64 orig_size, mem_used = 0; 1912 long max_used; 1913 ssize_t ret; 1914 1915 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1916 1917 guard(rwsem_read)(&zram->dev_lock); 1918 if (init_done(zram)) { 1919 mem_used = zs_get_total_pages(zram->mem_pool); 1920 zs_pool_stats(zram->mem_pool, &pool_stats); 1921 } 1922 1923 orig_size = atomic64_read(&zram->stats.pages_stored); 1924 max_used = atomic_long_read(&zram->stats.max_used_pages); 1925 1926 ret = sysfs_emit(buf, 1927 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1928 orig_size << PAGE_SHIFT, 1929 (u64)atomic64_read(&zram->stats.compr_data_size), 1930 mem_used << PAGE_SHIFT, 1931 zram->limit_pages << PAGE_SHIFT, 1932 max_used << PAGE_SHIFT, 1933 (u64)atomic64_read(&zram->stats.same_pages), 1934 atomic_long_read(&pool_stats.pages_compacted), 1935 (u64)atomic64_read(&zram->stats.huge_pages), 1936 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1937 1938 return ret; 1939 } 1940 1941 static ssize_t debug_stat_show(struct device *dev, 1942 struct device_attribute *attr, char *buf) 1943 { 1944 int version = 1; 1945 struct zram *zram = dev_to_zram(dev); 1946 ssize_t ret; 1947 1948 guard(rwsem_read)(&zram->dev_lock); 1949 ret = sysfs_emit(buf, 1950 "version: %d\n0 %8llu\n", 1951 version, 1952 (u64)atomic64_read(&zram->stats.miss_free)); 1953 1954 return ret; 1955 } 1956 1957 static void zram_meta_free(struct zram *zram, u64 disksize) 1958 { 1959 size_t num_pages = disksize >> PAGE_SHIFT; 1960 size_t index; 1961 1962 if (!zram->table) 1963 return; 1964 1965 /* Free all pages that are still in this zram device */ 1966 for (index = 0; index < num_pages; index++) 1967 slot_free(zram, index); 1968 1969 zs_destroy_pool(zram->mem_pool); 1970 vfree(zram->table); 1971 zram->table = NULL; 1972 } 1973 1974 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1975 { 1976 size_t num_pages, index; 1977 1978 num_pages = disksize >> PAGE_SHIFT; 1979 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1980 if (!zram->table) 1981 return false; 1982 1983 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1984 if (!zram->mem_pool) { 1985 vfree(zram->table); 1986 zram->table = NULL; 1987 return false; 1988 } 1989 1990 if (!huge_class_size) 1991 huge_class_size = zs_huge_class_size(zram->mem_pool); 1992 1993 for (index = 0; index < num_pages; index++) 1994 slot_lock_init(zram, index); 1995 1996 return true; 1997 } 1998 1999 static void slot_free(struct zram *zram, u32 index) 2000 { 2001 unsigned long handle; 2002 2003 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 2004 zram->table[index].attr.ac_time = 0; 2005 #endif 2006 2007 clear_slot_flag(zram, index, ZRAM_IDLE); 2008 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2009 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 2010 set_slot_comp_priority(zram, index, 0); 2011 2012 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 2013 clear_slot_flag(zram, index, ZRAM_HUGE); 2014 atomic64_dec(&zram->stats.huge_pages); 2015 } 2016 2017 if (test_slot_flag(zram, index, ZRAM_WB)) { 2018 clear_slot_flag(zram, index, ZRAM_WB); 2019 zram_release_bdev_block(zram, get_slot_handle(zram, index)); 2020 goto out; 2021 } 2022 2023 /* 2024 * No memory is allocated for same element filled pages. 2025 * Simply clear same page flag. 2026 */ 2027 if (test_slot_flag(zram, index, ZRAM_SAME)) { 2028 clear_slot_flag(zram, index, ZRAM_SAME); 2029 atomic64_dec(&zram->stats.same_pages); 2030 goto out; 2031 } 2032 2033 handle = get_slot_handle(zram, index); 2034 if (!handle) 2035 return; 2036 2037 zs_free(zram->mem_pool, handle); 2038 2039 atomic64_sub(get_slot_size(zram, index), 2040 &zram->stats.compr_data_size); 2041 out: 2042 atomic64_dec(&zram->stats.pages_stored); 2043 set_slot_handle(zram, index, 0); 2044 set_slot_size(zram, index, 0); 2045 } 2046 2047 static int read_same_filled_page(struct zram *zram, struct page *page, 2048 u32 index) 2049 { 2050 void *mem; 2051 2052 mem = kmap_local_page(page); 2053 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); 2054 kunmap_local(mem); 2055 return 0; 2056 } 2057 2058 static int read_incompressible_page(struct zram *zram, struct page *page, 2059 u32 index) 2060 { 2061 unsigned long handle; 2062 void *src, *dst; 2063 2064 handle = get_slot_handle(zram, index); 2065 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); 2066 dst = kmap_local_page(page); 2067 copy_page(dst, src); 2068 kunmap_local(dst); 2069 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); 2070 2071 return 0; 2072 } 2073 2074 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 2075 { 2076 struct zcomp_strm *zstrm; 2077 unsigned long handle; 2078 unsigned int size; 2079 void *src, *dst; 2080 int ret, prio; 2081 2082 handle = get_slot_handle(zram, index); 2083 size = get_slot_size(zram, index); 2084 prio = get_slot_comp_priority(zram, index); 2085 2086 zstrm = zcomp_stream_get(zram->comps[prio]); 2087 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2088 zstrm->local_copy); 2089 dst = kmap_local_page(page); 2090 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 2091 kunmap_local(dst); 2092 zs_obj_read_end(zram->mem_pool, handle, size, src); 2093 zcomp_stream_put(zstrm); 2094 2095 return ret; 2096 } 2097 2098 #if defined CONFIG_ZRAM_WRITEBACK 2099 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 2100 { 2101 struct zcomp_strm *zstrm; 2102 unsigned long handle; 2103 unsigned int size; 2104 void *src; 2105 2106 handle = get_slot_handle(zram, index); 2107 size = get_slot_size(zram, index); 2108 2109 /* 2110 * We need to get stream just for ->local_copy buffer, in 2111 * case if object spans two physical pages. No decompression 2112 * takes place here, as we read raw compressed data. 2113 */ 2114 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2115 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2116 zstrm->local_copy); 2117 memcpy_to_page(page, 0, src, size); 2118 zs_obj_read_end(zram->mem_pool, handle, size, src); 2119 zcomp_stream_put(zstrm); 2120 2121 return 0; 2122 } 2123 #endif 2124 2125 /* 2126 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2127 * Corresponding ZRAM slot should be locked. 2128 */ 2129 static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2130 { 2131 if (test_slot_flag(zram, index, ZRAM_SAME) || 2132 !get_slot_handle(zram, index)) 2133 return read_same_filled_page(zram, page, index); 2134 2135 if (!test_slot_flag(zram, index, ZRAM_HUGE)) 2136 return read_compressed_page(zram, page, index); 2137 else 2138 return read_incompressible_page(zram, page, index); 2139 } 2140 2141 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 2142 struct bio *parent) 2143 { 2144 int ret; 2145 2146 slot_lock(zram, index); 2147 if (!test_slot_flag(zram, index, ZRAM_WB)) { 2148 /* Slot should be locked through out the function call */ 2149 ret = read_from_zspool(zram, page, index); 2150 slot_unlock(zram, index); 2151 } else { 2152 unsigned long blk_idx = get_slot_handle(zram, index); 2153 2154 /* 2155 * The slot should be unlocked before reading from the backing 2156 * device. 2157 */ 2158 slot_unlock(zram, index); 2159 ret = read_from_bdev(zram, page, index, blk_idx, parent); 2160 } 2161 2162 /* Should NEVER happen. Return bio error if it does. */ 2163 if (WARN_ON(ret < 0)) 2164 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2165 2166 return ret; 2167 } 2168 2169 /* 2170 * Use a temporary buffer to decompress the page, as the decompressor 2171 * always expects a full page for the output. 2172 */ 2173 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2174 u32 index, int offset) 2175 { 2176 struct page *page = alloc_page(GFP_NOIO); 2177 int ret; 2178 2179 if (!page) 2180 return -ENOMEM; 2181 ret = zram_read_page(zram, page, index, NULL); 2182 if (likely(!ret)) 2183 memcpy_to_bvec(bvec, page_address(page) + offset); 2184 __free_page(page); 2185 return ret; 2186 } 2187 2188 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2189 u32 index, int offset, struct bio *bio) 2190 { 2191 if (is_partial_io(bvec)) 2192 return zram_bvec_read_partial(zram, bvec, index, offset); 2193 return zram_read_page(zram, bvec->bv_page, index, bio); 2194 } 2195 2196 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2197 u32 index) 2198 { 2199 slot_lock(zram, index); 2200 slot_free(zram, index); 2201 set_slot_flag(zram, index, ZRAM_SAME); 2202 set_slot_handle(zram, index, fill); 2203 slot_unlock(zram, index); 2204 2205 atomic64_inc(&zram->stats.same_pages); 2206 atomic64_inc(&zram->stats.pages_stored); 2207 2208 return 0; 2209 } 2210 2211 static int write_incompressible_page(struct zram *zram, struct page *page, 2212 u32 index) 2213 { 2214 unsigned long handle; 2215 void *src; 2216 2217 /* 2218 * This function is called from preemptible context so we don't need 2219 * to do optimistic and fallback to pessimistic handle allocation, 2220 * like we do for compressible pages. 2221 */ 2222 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2223 GFP_NOIO | __GFP_NOWARN | 2224 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2225 if (IS_ERR_VALUE(handle)) 2226 return PTR_ERR((void *)handle); 2227 2228 if (!zram_can_store_page(zram)) { 2229 zs_free(zram->mem_pool, handle); 2230 return -ENOMEM; 2231 } 2232 2233 src = kmap_local_page(page); 2234 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2235 kunmap_local(src); 2236 2237 slot_lock(zram, index); 2238 slot_free(zram, index); 2239 set_slot_flag(zram, index, ZRAM_HUGE); 2240 set_slot_handle(zram, index, handle); 2241 set_slot_size(zram, index, PAGE_SIZE); 2242 slot_unlock(zram, index); 2243 2244 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2245 atomic64_inc(&zram->stats.huge_pages); 2246 atomic64_inc(&zram->stats.huge_pages_since); 2247 atomic64_inc(&zram->stats.pages_stored); 2248 2249 return 0; 2250 } 2251 2252 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2253 { 2254 int ret = 0; 2255 unsigned long handle; 2256 unsigned int comp_len; 2257 void *mem; 2258 struct zcomp_strm *zstrm; 2259 unsigned long element; 2260 bool same_filled; 2261 2262 mem = kmap_local_page(page); 2263 same_filled = page_same_filled(mem, &element); 2264 kunmap_local(mem); 2265 if (same_filled) 2266 return write_same_filled_page(zram, element, index); 2267 2268 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2269 mem = kmap_local_page(page); 2270 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2271 mem, &comp_len); 2272 kunmap_local(mem); 2273 2274 if (unlikely(ret)) { 2275 zcomp_stream_put(zstrm); 2276 pr_err("Compression failed! err=%d\n", ret); 2277 return ret; 2278 } 2279 2280 if (comp_len >= huge_class_size) { 2281 zcomp_stream_put(zstrm); 2282 return write_incompressible_page(zram, page, index); 2283 } 2284 2285 handle = zs_malloc(zram->mem_pool, comp_len, 2286 GFP_NOIO | __GFP_NOWARN | 2287 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2288 if (IS_ERR_VALUE(handle)) { 2289 zcomp_stream_put(zstrm); 2290 return PTR_ERR((void *)handle); 2291 } 2292 2293 if (!zram_can_store_page(zram)) { 2294 zcomp_stream_put(zstrm); 2295 zs_free(zram->mem_pool, handle); 2296 return -ENOMEM; 2297 } 2298 2299 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2300 zcomp_stream_put(zstrm); 2301 2302 slot_lock(zram, index); 2303 slot_free(zram, index); 2304 set_slot_handle(zram, index, handle); 2305 set_slot_size(zram, index, comp_len); 2306 slot_unlock(zram, index); 2307 2308 /* Update stats */ 2309 atomic64_inc(&zram->stats.pages_stored); 2310 atomic64_add(comp_len, &zram->stats.compr_data_size); 2311 2312 return ret; 2313 } 2314 2315 /* 2316 * This is a partial IO. Read the full page before writing the changes. 2317 */ 2318 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2319 u32 index, int offset, struct bio *bio) 2320 { 2321 struct page *page = alloc_page(GFP_NOIO); 2322 int ret; 2323 2324 if (!page) 2325 return -ENOMEM; 2326 2327 ret = zram_read_page(zram, page, index, bio); 2328 if (!ret) { 2329 memcpy_from_bvec(page_address(page) + offset, bvec); 2330 ret = zram_write_page(zram, page, index); 2331 } 2332 __free_page(page); 2333 return ret; 2334 } 2335 2336 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2337 u32 index, int offset, struct bio *bio) 2338 { 2339 if (is_partial_io(bvec)) 2340 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 2341 return zram_write_page(zram, bvec->bv_page, index); 2342 } 2343 2344 #ifdef CONFIG_ZRAM_MULTI_COMP 2345 #define RECOMPRESS_IDLE (1 << 0) 2346 #define RECOMPRESS_HUGE (1 << 1) 2347 2348 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, 2349 struct zram_pp_ctl *ctl) 2350 { 2351 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2352 unsigned long index; 2353 2354 for (index = 0; index < nr_pages; index++) { 2355 bool ok = true; 2356 2357 slot_lock(zram, index); 2358 if (!slot_allocated(zram, index)) 2359 goto next; 2360 2361 if (mode & RECOMPRESS_IDLE && 2362 !test_slot_flag(zram, index, ZRAM_IDLE)) 2363 goto next; 2364 2365 if (mode & RECOMPRESS_HUGE && 2366 !test_slot_flag(zram, index, ZRAM_HUGE)) 2367 goto next; 2368 2369 if (test_slot_flag(zram, index, ZRAM_WB) || 2370 test_slot_flag(zram, index, ZRAM_SAME) || 2371 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2372 goto next; 2373 2374 /* Already compressed with same of higher priority */ 2375 if (get_slot_comp_priority(zram, index) + 1 >= prio_max) 2376 goto next; 2377 2378 ok = place_pp_slot(zram, ctl, index); 2379 next: 2380 slot_unlock(zram, index); 2381 if (!ok) 2382 break; 2383 } 2384 2385 return 0; 2386 } 2387 2388 /* 2389 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2390 * attempt to compress it using provided compression algorithm priority 2391 * (which is potentially more effective). 2392 * 2393 * Corresponding ZRAM slot should be locked. 2394 */ 2395 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2396 u64 *num_recomp_pages, u32 threshold, u32 prio, 2397 u32 prio_max) 2398 { 2399 struct zcomp_strm *zstrm = NULL; 2400 unsigned long handle_old; 2401 unsigned long handle_new; 2402 unsigned int comp_len_old; 2403 unsigned int comp_len_new; 2404 unsigned int class_index_old; 2405 unsigned int class_index_new; 2406 void *src; 2407 int ret = 0; 2408 2409 handle_old = get_slot_handle(zram, index); 2410 if (!handle_old) 2411 return -EINVAL; 2412 2413 comp_len_old = get_slot_size(zram, index); 2414 /* 2415 * Do not recompress objects that are already "small enough". 2416 */ 2417 if (comp_len_old < threshold) 2418 return 0; 2419 2420 ret = read_from_zspool(zram, page, index); 2421 if (ret) 2422 return ret; 2423 2424 /* 2425 * We touched this entry so mark it as non-IDLE. This makes sure that 2426 * we don't preserve IDLE flag and don't incorrectly pick this entry 2427 * for different post-processing type (e.g. writeback). 2428 */ 2429 clear_slot_flag(zram, index, ZRAM_IDLE); 2430 2431 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2432 2433 prio = max(prio, get_slot_comp_priority(zram, index) + 1); 2434 /* 2435 * Recompression slots scan should not select slots that are 2436 * already compressed with a higher priority algorithm, but 2437 * just in case 2438 */ 2439 if (prio >= prio_max) 2440 return 0; 2441 2442 /* 2443 * Iterate the secondary comp algorithms list (in order of priority) 2444 * and try to recompress the page. 2445 */ 2446 for (; prio < prio_max; prio++) { 2447 if (!zram->comps[prio]) 2448 continue; 2449 2450 zstrm = zcomp_stream_get(zram->comps[prio]); 2451 src = kmap_local_page(page); 2452 ret = zcomp_compress(zram->comps[prio], zstrm, 2453 src, &comp_len_new); 2454 kunmap_local(src); 2455 2456 if (ret) { 2457 zcomp_stream_put(zstrm); 2458 zstrm = NULL; 2459 break; 2460 } 2461 2462 class_index_new = zs_lookup_class_index(zram->mem_pool, 2463 comp_len_new); 2464 2465 /* Continue until we make progress */ 2466 if (class_index_new >= class_index_old || 2467 (threshold && comp_len_new >= threshold)) { 2468 zcomp_stream_put(zstrm); 2469 zstrm = NULL; 2470 continue; 2471 } 2472 2473 /* Recompression was successful so break out */ 2474 break; 2475 } 2476 2477 /* 2478 * Decrement the limit (if set) on pages we can recompress, even 2479 * when current recompression was unsuccessful or did not compress 2480 * the page below the threshold, because we still spent resources 2481 * on it. 2482 */ 2483 if (*num_recomp_pages) 2484 *num_recomp_pages -= 1; 2485 2486 /* Compression error */ 2487 if (ret) 2488 return ret; 2489 2490 if (!zstrm) { 2491 /* 2492 * Secondary algorithms failed to re-compress the page 2493 * in a way that would save memory. 2494 * 2495 * Mark the object incompressible if the max-priority 2496 * algorithm couldn't re-compress it. 2497 */ 2498 if (prio < zram->num_active_comps) 2499 return 0; 2500 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2501 return 0; 2502 } 2503 2504 /* 2505 * We are holding per-CPU stream mutex and entry lock so better 2506 * avoid direct reclaim. Allocation error is not fatal since 2507 * we still have the old object in the mem_pool. 2508 * 2509 * XXX: technically, the node we really want here is the node that 2510 * holds the original compressed data. But that would require us to 2511 * modify zsmalloc API to return this information. For now, we will 2512 * make do with the node of the page allocated for recompression. 2513 */ 2514 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2515 GFP_NOIO | __GFP_NOWARN | 2516 __GFP_HIGHMEM | __GFP_MOVABLE, 2517 page_to_nid(page)); 2518 if (IS_ERR_VALUE(handle_new)) { 2519 zcomp_stream_put(zstrm); 2520 return PTR_ERR((void *)handle_new); 2521 } 2522 2523 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2524 zcomp_stream_put(zstrm); 2525 2526 slot_free(zram, index); 2527 set_slot_handle(zram, index, handle_new); 2528 set_slot_size(zram, index, comp_len_new); 2529 set_slot_comp_priority(zram, index, prio); 2530 2531 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2532 atomic64_inc(&zram->stats.pages_stored); 2533 2534 return 0; 2535 } 2536 2537 static ssize_t recompress_store(struct device *dev, 2538 struct device_attribute *attr, 2539 const char *buf, size_t len) 2540 { 2541 struct zram *zram = dev_to_zram(dev); 2542 char *args, *param, *val, *algo = NULL; 2543 u64 num_recomp_pages = ULLONG_MAX; 2544 struct zram_pp_ctl *ctl = NULL; 2545 struct zram_pp_slot *pps; 2546 u32 mode = 0, threshold = 0; 2547 u32 prio, prio_max; 2548 struct page *page = NULL; 2549 ssize_t ret; 2550 2551 prio = ZRAM_SECONDARY_COMP; 2552 prio_max = zram->num_active_comps; 2553 2554 args = skip_spaces(buf); 2555 while (*args) { 2556 args = next_arg(args, ¶m, &val); 2557 2558 if (!val || !*val) 2559 return -EINVAL; 2560 2561 if (!strcmp(param, "type")) { 2562 if (!strcmp(val, "idle")) 2563 mode = RECOMPRESS_IDLE; 2564 if (!strcmp(val, "huge")) 2565 mode = RECOMPRESS_HUGE; 2566 if (!strcmp(val, "huge_idle")) 2567 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2568 continue; 2569 } 2570 2571 if (!strcmp(param, "max_pages")) { 2572 /* 2573 * Limit the number of entries (pages) we attempt to 2574 * recompress. 2575 */ 2576 ret = kstrtoull(val, 10, &num_recomp_pages); 2577 if (ret) 2578 return ret; 2579 continue; 2580 } 2581 2582 if (!strcmp(param, "threshold")) { 2583 /* 2584 * We will re-compress only idle objects equal or 2585 * greater in size than watermark. 2586 */ 2587 ret = kstrtouint(val, 10, &threshold); 2588 if (ret) 2589 return ret; 2590 continue; 2591 } 2592 2593 if (!strcmp(param, "algo")) { 2594 algo = val; 2595 continue; 2596 } 2597 2598 if (!strcmp(param, "priority")) { 2599 ret = kstrtouint(val, 10, &prio); 2600 if (ret) 2601 return ret; 2602 2603 if (prio == ZRAM_PRIMARY_COMP) 2604 prio = ZRAM_SECONDARY_COMP; 2605 2606 prio_max = prio + 1; 2607 continue; 2608 } 2609 } 2610 2611 if (threshold >= huge_class_size) 2612 return -EINVAL; 2613 2614 guard(rwsem_write)(&zram->dev_lock); 2615 if (!init_done(zram)) 2616 return -EINVAL; 2617 2618 if (algo) { 2619 bool found = false; 2620 2621 for (; prio < ZRAM_MAX_COMPS; prio++) { 2622 if (!zram->comp_algs[prio]) 2623 continue; 2624 2625 if (!strcmp(zram->comp_algs[prio], algo)) { 2626 prio_max = prio + 1; 2627 found = true; 2628 break; 2629 } 2630 } 2631 2632 if (!found) { 2633 ret = -EINVAL; 2634 goto out; 2635 } 2636 } 2637 2638 prio_max = min(prio_max, (u32)zram->num_active_comps); 2639 if (prio >= prio_max) { 2640 ret = -EINVAL; 2641 goto out; 2642 } 2643 2644 page = alloc_page(GFP_KERNEL); 2645 if (!page) { 2646 ret = -ENOMEM; 2647 goto out; 2648 } 2649 2650 ctl = init_pp_ctl(); 2651 if (!ctl) { 2652 ret = -ENOMEM; 2653 goto out; 2654 } 2655 2656 scan_slots_for_recompress(zram, mode, prio_max, ctl); 2657 2658 ret = len; 2659 while ((pps = select_pp_slot(ctl))) { 2660 int err = 0; 2661 2662 if (!num_recomp_pages) 2663 break; 2664 2665 slot_lock(zram, pps->index); 2666 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) 2667 goto next; 2668 2669 err = recompress_slot(zram, pps->index, page, 2670 &num_recomp_pages, threshold, 2671 prio, prio_max); 2672 next: 2673 slot_unlock(zram, pps->index); 2674 release_pp_slot(zram, pps); 2675 2676 if (err) { 2677 ret = err; 2678 break; 2679 } 2680 2681 cond_resched(); 2682 } 2683 2684 out: 2685 if (page) 2686 __free_page(page); 2687 release_pp_ctl(zram, ctl); 2688 return ret; 2689 } 2690 #endif 2691 2692 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2693 { 2694 size_t n = bio->bi_iter.bi_size; 2695 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2696 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2697 SECTOR_SHIFT; 2698 2699 /* 2700 * zram manages data in physical block size units. Because logical block 2701 * size isn't identical with physical block size on some arch, we 2702 * could get a discard request pointing to a specific offset within a 2703 * certain physical block. Although we can handle this request by 2704 * reading that physiclal block and decompressing and partially zeroing 2705 * and re-compressing and then re-storing it, this isn't reasonable 2706 * because our intent with a discard request is to save memory. So 2707 * skipping this logical block is appropriate here. 2708 */ 2709 if (offset) { 2710 if (n <= (PAGE_SIZE - offset)) 2711 return; 2712 2713 n -= (PAGE_SIZE - offset); 2714 index++; 2715 } 2716 2717 while (n >= PAGE_SIZE) { 2718 slot_lock(zram, index); 2719 slot_free(zram, index); 2720 slot_unlock(zram, index); 2721 atomic64_inc(&zram->stats.notify_free); 2722 index++; 2723 n -= PAGE_SIZE; 2724 } 2725 2726 bio_endio(bio); 2727 } 2728 2729 static void zram_bio_read(struct zram *zram, struct bio *bio) 2730 { 2731 unsigned long start_time = bio_start_io_acct(bio); 2732 struct bvec_iter iter = bio->bi_iter; 2733 2734 do { 2735 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2736 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2737 SECTOR_SHIFT; 2738 struct bio_vec bv = bio_iter_iovec(bio, iter); 2739 2740 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2741 2742 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2743 atomic64_inc(&zram->stats.failed_reads); 2744 bio->bi_status = BLK_STS_IOERR; 2745 break; 2746 } 2747 flush_dcache_page(bv.bv_page); 2748 2749 slot_lock(zram, index); 2750 mark_slot_accessed(zram, index); 2751 slot_unlock(zram, index); 2752 2753 bio_advance_iter_single(bio, &iter, bv.bv_len); 2754 } while (iter.bi_size); 2755 2756 bio_end_io_acct(bio, start_time); 2757 bio_endio(bio); 2758 } 2759 2760 static void zram_bio_write(struct zram *zram, struct bio *bio) 2761 { 2762 unsigned long start_time = bio_start_io_acct(bio); 2763 struct bvec_iter iter = bio->bi_iter; 2764 2765 do { 2766 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2767 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2768 SECTOR_SHIFT; 2769 struct bio_vec bv = bio_iter_iovec(bio, iter); 2770 2771 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2772 2773 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2774 atomic64_inc(&zram->stats.failed_writes); 2775 bio->bi_status = BLK_STS_IOERR; 2776 break; 2777 } 2778 2779 slot_lock(zram, index); 2780 mark_slot_accessed(zram, index); 2781 slot_unlock(zram, index); 2782 2783 bio_advance_iter_single(bio, &iter, bv.bv_len); 2784 } while (iter.bi_size); 2785 2786 bio_end_io_acct(bio, start_time); 2787 bio_endio(bio); 2788 } 2789 2790 /* 2791 * Handler function for all zram I/O requests. 2792 */ 2793 static void zram_submit_bio(struct bio *bio) 2794 { 2795 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2796 2797 switch (bio_op(bio)) { 2798 case REQ_OP_READ: 2799 zram_bio_read(zram, bio); 2800 break; 2801 case REQ_OP_WRITE: 2802 zram_bio_write(zram, bio); 2803 break; 2804 case REQ_OP_DISCARD: 2805 case REQ_OP_WRITE_ZEROES: 2806 zram_bio_discard(zram, bio); 2807 break; 2808 default: 2809 WARN_ON_ONCE(1); 2810 bio_endio(bio); 2811 } 2812 } 2813 2814 static void zram_slot_free_notify(struct block_device *bdev, 2815 unsigned long index) 2816 { 2817 struct zram *zram; 2818 2819 zram = bdev->bd_disk->private_data; 2820 2821 atomic64_inc(&zram->stats.notify_free); 2822 if (!slot_trylock(zram, index)) { 2823 atomic64_inc(&zram->stats.miss_free); 2824 return; 2825 } 2826 2827 slot_free(zram, index); 2828 slot_unlock(zram, index); 2829 } 2830 2831 static void zram_comp_params_reset(struct zram *zram) 2832 { 2833 u32 prio; 2834 2835 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2836 comp_params_reset(zram, prio); 2837 } 2838 } 2839 2840 static void zram_destroy_comps(struct zram *zram) 2841 { 2842 u32 prio; 2843 2844 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2845 struct zcomp *comp = zram->comps[prio]; 2846 2847 zram->comps[prio] = NULL; 2848 if (!comp) 2849 continue; 2850 zcomp_destroy(comp); 2851 zram->num_active_comps--; 2852 } 2853 2854 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2855 /* Do not free statically defined compression algorithms */ 2856 if (zram->comp_algs[prio] != default_compressor) 2857 kfree(zram->comp_algs[prio]); 2858 zram->comp_algs[prio] = NULL; 2859 } 2860 2861 zram_comp_params_reset(zram); 2862 } 2863 2864 static void zram_reset_device(struct zram *zram) 2865 { 2866 guard(rwsem_write)(&zram->dev_lock); 2867 2868 zram->limit_pages = 0; 2869 2870 set_capacity_and_notify(zram->disk, 0); 2871 part_stat_set_all(zram->disk->part0, 0); 2872 2873 /* I/O operation under all of CPU are done so let's free */ 2874 zram_meta_free(zram, zram->disksize); 2875 zram->disksize = 0; 2876 zram_destroy_comps(zram); 2877 memset(&zram->stats, 0, sizeof(zram->stats)); 2878 reset_bdev(zram); 2879 2880 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2881 } 2882 2883 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, 2884 const char *buf, size_t len) 2885 { 2886 u64 disksize; 2887 struct zcomp *comp; 2888 struct zram *zram = dev_to_zram(dev); 2889 int err; 2890 u32 prio; 2891 2892 disksize = memparse(buf, NULL); 2893 if (!disksize) 2894 return -EINVAL; 2895 2896 guard(rwsem_write)(&zram->dev_lock); 2897 if (init_done(zram)) { 2898 pr_info("Cannot change disksize for initialized device\n"); 2899 return -EBUSY; 2900 } 2901 2902 disksize = PAGE_ALIGN(disksize); 2903 if (!zram_meta_alloc(zram, disksize)) 2904 return -ENOMEM; 2905 2906 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2907 if (!zram->comp_algs[prio]) 2908 continue; 2909 2910 comp = zcomp_create(zram->comp_algs[prio], 2911 &zram->params[prio]); 2912 if (IS_ERR(comp)) { 2913 pr_err("Cannot initialise %s compressing backend\n", 2914 zram->comp_algs[prio]); 2915 err = PTR_ERR(comp); 2916 goto out_free_comps; 2917 } 2918 2919 zram->comps[prio] = comp; 2920 zram->num_active_comps++; 2921 } 2922 zram->disksize = disksize; 2923 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2924 2925 return len; 2926 2927 out_free_comps: 2928 zram_destroy_comps(zram); 2929 zram_meta_free(zram, disksize); 2930 return err; 2931 } 2932 2933 static ssize_t reset_store(struct device *dev, 2934 struct device_attribute *attr, const char *buf, size_t len) 2935 { 2936 int ret; 2937 unsigned short do_reset; 2938 struct zram *zram; 2939 struct gendisk *disk; 2940 2941 ret = kstrtou16(buf, 10, &do_reset); 2942 if (ret) 2943 return ret; 2944 2945 if (!do_reset) 2946 return -EINVAL; 2947 2948 zram = dev_to_zram(dev); 2949 disk = zram->disk; 2950 2951 mutex_lock(&disk->open_mutex); 2952 /* Do not reset an active device or claimed device */ 2953 if (disk_openers(disk) || zram->claim) { 2954 mutex_unlock(&disk->open_mutex); 2955 return -EBUSY; 2956 } 2957 2958 /* From now on, anyone can't open /dev/zram[0-9] */ 2959 zram->claim = true; 2960 mutex_unlock(&disk->open_mutex); 2961 2962 /* Make sure all the pending I/O are finished */ 2963 sync_blockdev(disk->part0); 2964 zram_reset_device(zram); 2965 2966 mutex_lock(&disk->open_mutex); 2967 zram->claim = false; 2968 mutex_unlock(&disk->open_mutex); 2969 2970 return len; 2971 } 2972 2973 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2974 { 2975 struct zram *zram = disk->private_data; 2976 2977 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2978 2979 /* zram was claimed to reset so open request fails */ 2980 if (zram->claim) 2981 return -EBUSY; 2982 return 0; 2983 } 2984 2985 static const struct block_device_operations zram_devops = { 2986 .open = zram_open, 2987 .submit_bio = zram_submit_bio, 2988 .swap_slot_free_notify = zram_slot_free_notify, 2989 .owner = THIS_MODULE 2990 }; 2991 2992 static DEVICE_ATTR_RO(io_stat); 2993 static DEVICE_ATTR_RO(mm_stat); 2994 static DEVICE_ATTR_RO(debug_stat); 2995 static DEVICE_ATTR_WO(compact); 2996 static DEVICE_ATTR_RW(disksize); 2997 static DEVICE_ATTR_RO(initstate); 2998 static DEVICE_ATTR_WO(reset); 2999 static DEVICE_ATTR_WO(mem_limit); 3000 static DEVICE_ATTR_WO(mem_used_max); 3001 static DEVICE_ATTR_WO(idle); 3002 static DEVICE_ATTR_RW(comp_algorithm); 3003 #ifdef CONFIG_ZRAM_WRITEBACK 3004 static DEVICE_ATTR_RO(bd_stat); 3005 static DEVICE_ATTR_RW(backing_dev); 3006 static DEVICE_ATTR_WO(writeback); 3007 static DEVICE_ATTR_RW(writeback_limit); 3008 static DEVICE_ATTR_RW(writeback_limit_enable); 3009 static DEVICE_ATTR_RW(writeback_batch_size); 3010 static DEVICE_ATTR_RW(writeback_compressed); 3011 #endif 3012 #ifdef CONFIG_ZRAM_MULTI_COMP 3013 static DEVICE_ATTR_RW(recomp_algorithm); 3014 static DEVICE_ATTR_WO(recompress); 3015 #endif 3016 static DEVICE_ATTR_WO(algorithm_params); 3017 3018 static struct attribute *zram_disk_attrs[] = { 3019 &dev_attr_disksize.attr, 3020 &dev_attr_initstate.attr, 3021 &dev_attr_reset.attr, 3022 &dev_attr_compact.attr, 3023 &dev_attr_mem_limit.attr, 3024 &dev_attr_mem_used_max.attr, 3025 &dev_attr_idle.attr, 3026 &dev_attr_comp_algorithm.attr, 3027 #ifdef CONFIG_ZRAM_WRITEBACK 3028 &dev_attr_bd_stat.attr, 3029 &dev_attr_backing_dev.attr, 3030 &dev_attr_writeback.attr, 3031 &dev_attr_writeback_limit.attr, 3032 &dev_attr_writeback_limit_enable.attr, 3033 &dev_attr_writeback_batch_size.attr, 3034 &dev_attr_writeback_compressed.attr, 3035 #endif 3036 &dev_attr_io_stat.attr, 3037 &dev_attr_mm_stat.attr, 3038 &dev_attr_debug_stat.attr, 3039 #ifdef CONFIG_ZRAM_MULTI_COMP 3040 &dev_attr_recomp_algorithm.attr, 3041 &dev_attr_recompress.attr, 3042 #endif 3043 &dev_attr_algorithm_params.attr, 3044 NULL, 3045 }; 3046 3047 ATTRIBUTE_GROUPS(zram_disk); 3048 3049 /* 3050 * Allocate and initialize new zram device. the function returns 3051 * '>= 0' device_id upon success, and negative value otherwise. 3052 */ 3053 static int zram_add(void) 3054 { 3055 struct queue_limits lim = { 3056 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 3057 /* 3058 * To ensure that we always get PAGE_SIZE aligned and 3059 * n*PAGE_SIZED sized I/O requests. 3060 */ 3061 .physical_block_size = PAGE_SIZE, 3062 .io_min = PAGE_SIZE, 3063 .io_opt = PAGE_SIZE, 3064 .max_hw_discard_sectors = UINT_MAX, 3065 /* 3066 * zram_bio_discard() will clear all logical blocks if logical 3067 * block size is identical with physical block size(PAGE_SIZE). 3068 * But if it is different, we will skip discarding some parts of 3069 * logical blocks in the part of the request range which isn't 3070 * aligned to physical block size. So we can't ensure that all 3071 * discarded logical blocks are zeroed. 3072 */ 3073 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 3074 .max_write_zeroes_sectors = UINT_MAX, 3075 #endif 3076 .features = BLK_FEAT_STABLE_WRITES | 3077 BLK_FEAT_SYNCHRONOUS, 3078 }; 3079 struct zram *zram; 3080 int ret, device_id; 3081 3082 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 3083 if (!zram) 3084 return -ENOMEM; 3085 3086 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 3087 if (ret < 0) 3088 goto out_free_dev; 3089 device_id = ret; 3090 3091 init_rwsem(&zram->dev_lock); 3092 #ifdef CONFIG_ZRAM_WRITEBACK 3093 zram->wb_batch_size = 32; 3094 zram->wb_compressed = false; 3095 #endif 3096 3097 /* gendisk structure */ 3098 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 3099 if (IS_ERR(zram->disk)) { 3100 pr_err("Error allocating disk structure for device %d\n", 3101 device_id); 3102 ret = PTR_ERR(zram->disk); 3103 goto out_free_idr; 3104 } 3105 3106 zram->disk->major = zram_major; 3107 zram->disk->first_minor = device_id; 3108 zram->disk->minors = 1; 3109 zram->disk->flags |= GENHD_FL_NO_PART; 3110 zram->disk->fops = &zram_devops; 3111 zram->disk->private_data = zram; 3112 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 3113 zram_comp_params_reset(zram); 3114 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 3115 3116 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 3117 set_capacity(zram->disk, 0); 3118 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 3119 if (ret) 3120 goto out_cleanup_disk; 3121 3122 zram_debugfs_register(zram); 3123 pr_info("Added device: %s\n", zram->disk->disk_name); 3124 return device_id; 3125 3126 out_cleanup_disk: 3127 put_disk(zram->disk); 3128 out_free_idr: 3129 idr_remove(&zram_index_idr, device_id); 3130 out_free_dev: 3131 kfree(zram); 3132 return ret; 3133 } 3134 3135 static int zram_remove(struct zram *zram) 3136 { 3137 bool claimed; 3138 3139 mutex_lock(&zram->disk->open_mutex); 3140 if (disk_openers(zram->disk)) { 3141 mutex_unlock(&zram->disk->open_mutex); 3142 return -EBUSY; 3143 } 3144 3145 claimed = zram->claim; 3146 if (!claimed) 3147 zram->claim = true; 3148 mutex_unlock(&zram->disk->open_mutex); 3149 3150 zram_debugfs_unregister(zram); 3151 3152 if (claimed) { 3153 /* 3154 * If we were claimed by reset_store(), del_gendisk() will 3155 * wait until reset_store() is done, so nothing need to do. 3156 */ 3157 ; 3158 } else { 3159 /* Make sure all the pending I/O are finished */ 3160 sync_blockdev(zram->disk->part0); 3161 zram_reset_device(zram); 3162 } 3163 3164 pr_info("Removed device: %s\n", zram->disk->disk_name); 3165 3166 del_gendisk(zram->disk); 3167 3168 /* del_gendisk drains pending reset_store */ 3169 WARN_ON_ONCE(claimed && zram->claim); 3170 3171 /* 3172 * disksize_store() may be called in between zram_reset_device() 3173 * and del_gendisk(), so run the last reset to avoid leaking 3174 * anything allocated with disksize_store() 3175 */ 3176 zram_reset_device(zram); 3177 3178 put_disk(zram->disk); 3179 kfree(zram); 3180 return 0; 3181 } 3182 3183 /* zram-control sysfs attributes */ 3184 3185 /* 3186 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3187 * sense that reading from this file does alter the state of your system -- it 3188 * creates a new un-initialized zram device and returns back this device's 3189 * device_id (or an error code if it fails to create a new device). 3190 */ 3191 static ssize_t hot_add_show(const struct class *class, 3192 const struct class_attribute *attr, 3193 char *buf) 3194 { 3195 int ret; 3196 3197 mutex_lock(&zram_index_mutex); 3198 ret = zram_add(); 3199 mutex_unlock(&zram_index_mutex); 3200 3201 if (ret < 0) 3202 return ret; 3203 return sysfs_emit(buf, "%d\n", ret); 3204 } 3205 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3206 static struct class_attribute class_attr_hot_add = 3207 __ATTR(hot_add, 0400, hot_add_show, NULL); 3208 3209 static ssize_t hot_remove_store(const struct class *class, 3210 const struct class_attribute *attr, 3211 const char *buf, 3212 size_t count) 3213 { 3214 struct zram *zram; 3215 int ret, dev_id; 3216 3217 /* dev_id is gendisk->first_minor, which is `int' */ 3218 ret = kstrtoint(buf, 10, &dev_id); 3219 if (ret) 3220 return ret; 3221 if (dev_id < 0) 3222 return -EINVAL; 3223 3224 mutex_lock(&zram_index_mutex); 3225 3226 zram = idr_find(&zram_index_idr, dev_id); 3227 if (zram) { 3228 ret = zram_remove(zram); 3229 if (!ret) 3230 idr_remove(&zram_index_idr, dev_id); 3231 } else { 3232 ret = -ENODEV; 3233 } 3234 3235 mutex_unlock(&zram_index_mutex); 3236 return ret ? ret : count; 3237 } 3238 static CLASS_ATTR_WO(hot_remove); 3239 3240 static struct attribute *zram_control_class_attrs[] = { 3241 &class_attr_hot_add.attr, 3242 &class_attr_hot_remove.attr, 3243 NULL, 3244 }; 3245 ATTRIBUTE_GROUPS(zram_control_class); 3246 3247 static struct class zram_control_class = { 3248 .name = "zram-control", 3249 .class_groups = zram_control_class_groups, 3250 }; 3251 3252 static int zram_remove_cb(int id, void *ptr, void *data) 3253 { 3254 WARN_ON_ONCE(zram_remove(ptr)); 3255 return 0; 3256 } 3257 3258 static void destroy_devices(void) 3259 { 3260 class_unregister(&zram_control_class); 3261 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3262 zram_debugfs_destroy(); 3263 idr_destroy(&zram_index_idr); 3264 unregister_blkdev(zram_major, "zram"); 3265 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3266 } 3267 3268 static int __init zram_init(void) 3269 { 3270 struct zram_table_entry zram_te; 3271 int ret; 3272 3273 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); 3274 3275 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3276 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3277 if (ret < 0) 3278 return ret; 3279 3280 ret = class_register(&zram_control_class); 3281 if (ret) { 3282 pr_err("Unable to register zram-control class\n"); 3283 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3284 return ret; 3285 } 3286 3287 zram_debugfs_create(); 3288 zram_major = register_blkdev(0, "zram"); 3289 if (zram_major <= 0) { 3290 pr_err("Unable to get major number\n"); 3291 class_unregister(&zram_control_class); 3292 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3293 return -EBUSY; 3294 } 3295 3296 while (num_devices != 0) { 3297 mutex_lock(&zram_index_mutex); 3298 ret = zram_add(); 3299 mutex_unlock(&zram_index_mutex); 3300 if (ret < 0) 3301 goto out_error; 3302 num_devices--; 3303 } 3304 3305 return 0; 3306 3307 out_error: 3308 destroy_devices(); 3309 return ret; 3310 } 3311 3312 static void __exit zram_exit(void) 3313 { 3314 destroy_devices(); 3315 } 3316 3317 module_init(zram_init); 3318 module_exit(zram_exit); 3319 3320 module_param(num_devices, uint, 0); 3321 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3322 3323 MODULE_LICENSE("Dual BSD/GPL"); 3324 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3325 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3326