1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define pr_fmt(fmt) "zram: " fmt 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/bio.h> 20 #include <linux/bitops.h> 21 #include <linux/blkdev.h> 22 #include <linux/buffer_head.h> 23 #include <linux/device.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/backing-dev.h> 27 #include <linux/string.h> 28 #include <linux/vmalloc.h> 29 #include <linux/err.h> 30 #include <linux/idr.h> 31 #include <linux/sysfs.h> 32 #include <linux/debugfs.h> 33 #include <linux/cpuhotplug.h> 34 #include <linux/part_stat.h> 35 #include <linux/kernel_read_file.h> 36 #include <linux/rcupdate.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 #define ZRAM_MAX_ALGO_NAME_SZ 128 48 49 /* Module params (documentation at end) */ 50 static unsigned int num_devices = 1; 51 /* 52 * Pages that compress to sizes equals or greater than this are stored 53 * uncompressed in memory. 54 */ 55 static size_t huge_class_size; 56 57 static const struct block_device_operations zram_devops; 58 59 static void slot_free(struct zram *zram, u32 index); 60 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 61 62 static void slot_lock_init(struct zram *zram, u32 index) 63 { 64 static struct lock_class_key __key; 65 66 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", 67 &__key, 0); 68 } 69 70 /* 71 * entry locking rules: 72 * 73 * 1) Lock is exclusive 74 * 75 * 2) lock() function can sleep waiting for the lock 76 * 77 * 3) Lock owner can sleep 78 * 79 * 4) Use TRY lock variant when in atomic context 80 * - must check return value and handle locking failers 81 */ 82 static __must_check bool slot_trylock(struct zram *zram, u32 index) 83 { 84 unsigned long *lock = &zram->table[index].__lock; 85 86 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { 87 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); 88 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 89 return true; 90 } 91 92 return false; 93 } 94 95 static void slot_lock(struct zram *zram, u32 index) 96 { 97 unsigned long *lock = &zram->table[index].__lock; 98 99 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); 100 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); 101 lock_acquired(slot_dep_map(zram, index), _RET_IP_); 102 } 103 104 static void slot_unlock(struct zram *zram, u32 index) 105 { 106 unsigned long *lock = &zram->table[index].__lock; 107 108 mutex_release(slot_dep_map(zram, index), _RET_IP_); 109 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); 110 } 111 112 static inline bool init_done(struct zram *zram) 113 { 114 return zram->disksize; 115 } 116 117 static inline struct zram *dev_to_zram(struct device *dev) 118 { 119 return (struct zram *)dev_to_disk(dev)->private_data; 120 } 121 122 static unsigned long get_slot_handle(struct zram *zram, u32 index) 123 { 124 return zram->table[index].handle; 125 } 126 127 static void set_slot_handle(struct zram *zram, u32 index, unsigned long handle) 128 { 129 zram->table[index].handle = handle; 130 } 131 132 static bool test_slot_flag(struct zram *zram, u32 index, 133 enum zram_pageflags flag) 134 { 135 return zram->table[index].attr.flags & BIT(flag); 136 } 137 138 static void set_slot_flag(struct zram *zram, u32 index, 139 enum zram_pageflags flag) 140 { 141 zram->table[index].attr.flags |= BIT(flag); 142 } 143 144 static void clear_slot_flag(struct zram *zram, u32 index, 145 enum zram_pageflags flag) 146 { 147 zram->table[index].attr.flags &= ~BIT(flag); 148 } 149 150 static size_t get_slot_size(struct zram *zram, u32 index) 151 { 152 return zram->table[index].attr.flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 153 } 154 155 static void set_slot_size(struct zram *zram, u32 index, size_t size) 156 { 157 unsigned long flags = zram->table[index].attr.flags >> ZRAM_FLAG_SHIFT; 158 159 zram->table[index].attr.flags = (flags << ZRAM_FLAG_SHIFT) | size; 160 } 161 162 static inline bool slot_allocated(struct zram *zram, u32 index) 163 { 164 return get_slot_size(zram, index) || 165 test_slot_flag(zram, index, ZRAM_SAME) || 166 test_slot_flag(zram, index, ZRAM_WB); 167 } 168 169 static inline void set_slot_comp_priority(struct zram *zram, u32 index, 170 u32 prio) 171 { 172 prio &= ZRAM_COMP_PRIORITY_MASK; 173 /* 174 * Clear previous priority value first, in case if we recompress 175 * further an already recompressed page 176 */ 177 zram->table[index].attr.flags &= ~(ZRAM_COMP_PRIORITY_MASK << 178 ZRAM_COMP_PRIORITY_BIT1); 179 zram->table[index].attr.flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 180 } 181 182 static inline u32 get_slot_comp_priority(struct zram *zram, u32 index) 183 { 184 u32 prio = zram->table[index].attr.flags >> ZRAM_COMP_PRIORITY_BIT1; 185 186 return prio & ZRAM_COMP_PRIORITY_MASK; 187 } 188 189 static void mark_slot_accessed(struct zram *zram, u32 index) 190 { 191 clear_slot_flag(zram, index, ZRAM_IDLE); 192 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 193 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 194 zram->table[index].attr.ac_time = (u32)ktime_get_boottime_seconds(); 195 #endif 196 } 197 198 static inline void update_used_max(struct zram *zram, const unsigned long pages) 199 { 200 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 201 202 do { 203 if (cur_max >= pages) 204 return; 205 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 206 &cur_max, pages)); 207 } 208 209 static bool zram_can_store_page(struct zram *zram) 210 { 211 unsigned long alloced_pages; 212 213 alloced_pages = zs_get_total_pages(zram->mem_pool); 214 update_used_max(zram, alloced_pages); 215 216 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 217 } 218 219 #if PAGE_SIZE != 4096 220 static inline bool is_partial_io(struct bio_vec *bvec) 221 { 222 return bvec->bv_len != PAGE_SIZE; 223 } 224 #define ZRAM_PARTIAL_IO 1 225 #else 226 static inline bool is_partial_io(struct bio_vec *bvec) 227 { 228 return false; 229 } 230 #endif 231 232 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 233 struct zram_pp_slot { 234 unsigned long index; 235 struct list_head entry; 236 }; 237 238 /* 239 * A post-processing bucket is, essentially, a size class, this defines 240 * the range (in bytes) of pp-slots sizes in particular bucket. 241 */ 242 #define PP_BUCKET_SIZE_RANGE 64 243 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 244 245 struct zram_pp_ctl { 246 struct list_head pp_buckets[NUM_PP_BUCKETS]; 247 }; 248 249 static struct zram_pp_ctl *init_pp_ctl(void) 250 { 251 struct zram_pp_ctl *ctl; 252 u32 idx; 253 254 ctl = kmalloc_obj(*ctl); 255 if (!ctl) 256 return NULL; 257 258 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 259 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 260 return ctl; 261 } 262 263 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 264 { 265 list_del_init(&pps->entry); 266 267 slot_lock(zram, pps->index); 268 clear_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 269 slot_unlock(zram, pps->index); 270 271 kfree(pps); 272 } 273 274 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 275 { 276 u32 idx; 277 278 if (!ctl) 279 return; 280 281 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 282 while (!list_empty(&ctl->pp_buckets[idx])) { 283 struct zram_pp_slot *pps; 284 285 pps = list_first_entry(&ctl->pp_buckets[idx], 286 struct zram_pp_slot, 287 entry); 288 release_pp_slot(zram, pps); 289 } 290 } 291 292 kfree(ctl); 293 } 294 295 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 296 u32 index) 297 { 298 struct zram_pp_slot *pps; 299 u32 bid; 300 301 pps = kmalloc_obj(*pps, GFP_NOIO | __GFP_NOWARN); 302 if (!pps) 303 return false; 304 305 INIT_LIST_HEAD(&pps->entry); 306 pps->index = index; 307 308 bid = get_slot_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 309 list_add(&pps->entry, &ctl->pp_buckets[bid]); 310 311 set_slot_flag(zram, pps->index, ZRAM_PP_SLOT); 312 return true; 313 } 314 315 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 316 { 317 struct zram_pp_slot *pps = NULL; 318 s32 idx = NUM_PP_BUCKETS - 1; 319 320 /* The higher the bucket id the more optimal slot post-processing is */ 321 while (idx >= 0) { 322 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 323 struct zram_pp_slot, 324 entry); 325 if (pps) 326 break; 327 328 idx--; 329 } 330 return pps; 331 } 332 #endif 333 334 static inline void zram_fill_page(void *ptr, unsigned long len, 335 unsigned long value) 336 { 337 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 338 memset_l(ptr, value, len / sizeof(unsigned long)); 339 } 340 341 static bool page_same_filled(void *ptr, unsigned long *element) 342 { 343 unsigned long *page; 344 unsigned long val; 345 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 346 347 page = (unsigned long *)ptr; 348 val = page[0]; 349 350 if (val != page[last_pos]) 351 return false; 352 353 for (pos = 1; pos < last_pos; pos++) { 354 if (val != page[pos]) 355 return false; 356 } 357 358 *element = val; 359 360 return true; 361 } 362 363 static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, 364 char *buf) 365 { 366 u32 val; 367 struct zram *zram = dev_to_zram(dev); 368 369 guard(rwsem_read)(&zram->dev_lock); 370 val = init_done(zram); 371 372 return sysfs_emit(buf, "%u\n", val); 373 } 374 375 static ssize_t disksize_show(struct device *dev, 376 struct device_attribute *attr, char *buf) 377 { 378 struct zram *zram = dev_to_zram(dev); 379 380 return sysfs_emit(buf, "%llu\n", zram->disksize); 381 } 382 383 static ssize_t mem_limit_store(struct device *dev, 384 struct device_attribute *attr, const char *buf, 385 size_t len) 386 { 387 u64 limit; 388 char *tmp; 389 struct zram *zram = dev_to_zram(dev); 390 391 limit = memparse(buf, &tmp); 392 if (buf == tmp) /* no chars parsed, invalid input */ 393 return -EINVAL; 394 395 guard(rwsem_write)(&zram->dev_lock); 396 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 397 398 return len; 399 } 400 401 static ssize_t mem_used_max_store(struct device *dev, 402 struct device_attribute *attr, 403 const char *buf, size_t len) 404 { 405 int err; 406 unsigned long val; 407 struct zram *zram = dev_to_zram(dev); 408 409 err = kstrtoul(buf, 10, &val); 410 if (err || val != 0) 411 return -EINVAL; 412 413 guard(rwsem_read)(&zram->dev_lock); 414 if (init_done(zram)) { 415 atomic_long_set(&zram->stats.max_used_pages, 416 zs_get_total_pages(zram->mem_pool)); 417 } 418 419 return len; 420 } 421 422 /* 423 * Mark all pages which are older than or equal to cutoff as IDLE. 424 * Callers should hold the zram init lock in read mode 425 */ 426 static void mark_idle(struct zram *zram, ktime_t cutoff) 427 { 428 int is_idle = 1; 429 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 430 int index; 431 432 for (index = 0; index < nr_pages; index++) { 433 /* 434 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 435 * post-processing (recompress, writeback) happens to the 436 * ZRAM_SAME slot. 437 * 438 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 439 */ 440 slot_lock(zram, index); 441 if (!slot_allocated(zram, index) || 442 test_slot_flag(zram, index, ZRAM_WB) || 443 test_slot_flag(zram, index, ZRAM_SAME)) { 444 slot_unlock(zram, index); 445 continue; 446 } 447 448 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 449 is_idle = !cutoff || 450 ktime_after(cutoff, zram->table[index].attr.ac_time); 451 #endif 452 if (is_idle) 453 set_slot_flag(zram, index, ZRAM_IDLE); 454 else 455 clear_slot_flag(zram, index, ZRAM_IDLE); 456 slot_unlock(zram, index); 457 } 458 } 459 460 static ssize_t idle_store(struct device *dev, struct device_attribute *attr, 461 const char *buf, size_t len) 462 { 463 struct zram *zram = dev_to_zram(dev); 464 ktime_t cutoff = 0; 465 466 if (!sysfs_streq(buf, "all")) { 467 /* 468 * If it did not parse as 'all' try to treat it as an integer 469 * when we have memory tracking enabled. 470 */ 471 u32 age_sec; 472 473 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && 474 !kstrtouint(buf, 0, &age_sec)) 475 cutoff = ktime_sub((u32)ktime_get_boottime_seconds(), 476 age_sec); 477 else 478 return -EINVAL; 479 } 480 481 guard(rwsem_read)(&zram->dev_lock); 482 if (!init_done(zram)) 483 return -EINVAL; 484 485 /* 486 * A cutoff of 0 marks everything as idle, this is the 487 * "all" behavior. 488 */ 489 mark_idle(zram, cutoff); 490 return len; 491 } 492 493 #ifdef CONFIG_ZRAM_WRITEBACK 494 #define INVALID_BDEV_BLOCK (~0UL) 495 496 static int read_from_zspool_raw(struct zram *zram, struct page *page, 497 u32 index); 498 static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 499 500 struct zram_wb_ctl { 501 /* idle list is accessed only by the writeback task, no concurency */ 502 struct list_head idle_reqs; 503 /* done list is accessed concurrently, protect by done_lock */ 504 struct list_head done_reqs; 505 wait_queue_head_t done_wait; 506 spinlock_t done_lock; 507 atomic_t num_inflight; 508 struct rcu_head rcu; 509 }; 510 511 struct zram_wb_req { 512 unsigned long blk_idx; 513 struct page *page; 514 struct zram_pp_slot *pps; 515 struct bio_vec bio_vec; 516 struct bio bio; 517 518 struct list_head entry; 519 }; 520 521 struct zram_rb_req { 522 struct work_struct work; 523 struct zram *zram; 524 struct page *page; 525 /* The read bio for backing device */ 526 struct bio *bio; 527 unsigned long blk_idx; 528 union { 529 /* The original bio to complete (async read) */ 530 struct bio *parent; 531 /* error status (sync read) */ 532 int error; 533 }; 534 u32 index; 535 }; 536 537 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 538 static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, 539 char *buf) 540 { 541 struct zram *zram = dev_to_zram(dev); 542 ssize_t ret; 543 544 guard(rwsem_read)(&zram->dev_lock); 545 ret = sysfs_emit(buf, 546 "%8llu %8llu %8llu\n", 547 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 548 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 549 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 550 551 return ret; 552 } 553 554 static ssize_t compressed_writeback_store(struct device *dev, 555 struct device_attribute *attr, 556 const char *buf, size_t len) 557 { 558 struct zram *zram = dev_to_zram(dev); 559 bool val; 560 561 if (kstrtobool(buf, &val)) 562 return -EINVAL; 563 564 guard(rwsem_write)(&zram->dev_lock); 565 if (init_done(zram)) { 566 return -EBUSY; 567 } 568 569 zram->compressed_wb = val; 570 571 return len; 572 } 573 574 static ssize_t compressed_writeback_show(struct device *dev, 575 struct device_attribute *attr, 576 char *buf) 577 { 578 bool val; 579 struct zram *zram = dev_to_zram(dev); 580 581 guard(rwsem_read)(&zram->dev_lock); 582 val = zram->compressed_wb; 583 584 return sysfs_emit(buf, "%d\n", val); 585 } 586 587 static ssize_t writeback_limit_enable_store(struct device *dev, 588 struct device_attribute *attr, 589 const char *buf, size_t len) 590 { 591 struct zram *zram = dev_to_zram(dev); 592 u64 val; 593 594 if (kstrtoull(buf, 10, &val)) 595 return -EINVAL; 596 597 guard(rwsem_write)(&zram->dev_lock); 598 zram->wb_limit_enable = val; 599 600 return len; 601 } 602 603 static ssize_t writeback_limit_enable_show(struct device *dev, 604 struct device_attribute *attr, 605 char *buf) 606 { 607 bool val; 608 struct zram *zram = dev_to_zram(dev); 609 610 guard(rwsem_read)(&zram->dev_lock); 611 val = zram->wb_limit_enable; 612 613 return sysfs_emit(buf, "%d\n", val); 614 } 615 616 static ssize_t writeback_limit_store(struct device *dev, 617 struct device_attribute *attr, 618 const char *buf, size_t len) 619 { 620 struct zram *zram = dev_to_zram(dev); 621 u64 val; 622 623 if (kstrtoull(buf, 10, &val)) 624 return -EINVAL; 625 626 /* 627 * When the page size is greater than 4KB, if bd_wb_limit is set to 628 * a value that is not page - size aligned, it will cause value 629 * wrapping. For example, when the page size is set to 16KB and 630 * bd_wb_limit is set to 3, a single write - back operation will 631 * cause bd_wb_limit to become -1. Even more terrifying is that 632 * bd_wb_limit is an unsigned number. 633 */ 634 val = rounddown(val, PAGE_SIZE / 4096); 635 636 guard(rwsem_write)(&zram->dev_lock); 637 zram->bd_wb_limit = val; 638 639 return len; 640 } 641 642 static ssize_t writeback_limit_show(struct device *dev, 643 struct device_attribute *attr, char *buf) 644 { 645 u64 val; 646 struct zram *zram = dev_to_zram(dev); 647 648 guard(rwsem_read)(&zram->dev_lock); 649 val = zram->bd_wb_limit; 650 651 return sysfs_emit(buf, "%llu\n", val); 652 } 653 654 static ssize_t writeback_batch_size_store(struct device *dev, 655 struct device_attribute *attr, 656 const char *buf, size_t len) 657 { 658 struct zram *zram = dev_to_zram(dev); 659 u32 val; 660 661 if (kstrtouint(buf, 10, &val)) 662 return -EINVAL; 663 664 if (!val) 665 return -EINVAL; 666 667 guard(rwsem_write)(&zram->dev_lock); 668 zram->wb_batch_size = val; 669 670 return len; 671 } 672 673 static ssize_t writeback_batch_size_show(struct device *dev, 674 struct device_attribute *attr, 675 char *buf) 676 { 677 u32 val; 678 struct zram *zram = dev_to_zram(dev); 679 680 guard(rwsem_read)(&zram->dev_lock); 681 val = zram->wb_batch_size; 682 683 return sysfs_emit(buf, "%u\n", val); 684 } 685 686 static void reset_bdev(struct zram *zram) 687 { 688 if (!zram->backing_dev) 689 return; 690 691 /* hope filp_close flush all of IO */ 692 filp_close(zram->backing_dev, NULL); 693 zram->backing_dev = NULL; 694 zram->bdev = NULL; 695 zram->disk->fops = &zram_devops; 696 kvfree(zram->bitmap); 697 zram->bitmap = NULL; 698 } 699 700 static ssize_t backing_dev_show(struct device *dev, 701 struct device_attribute *attr, char *buf) 702 { 703 struct file *file; 704 struct zram *zram = dev_to_zram(dev); 705 char *p; 706 ssize_t ret; 707 708 guard(rwsem_read)(&zram->dev_lock); 709 file = zram->backing_dev; 710 if (!file) { 711 memcpy(buf, "none\n", 5); 712 return 5; 713 } 714 715 p = file_path(file, buf, PAGE_SIZE - 1); 716 if (IS_ERR(p)) 717 return PTR_ERR(p); 718 719 ret = strlen(p); 720 memmove(buf, p, ret); 721 buf[ret++] = '\n'; 722 return ret; 723 } 724 725 static ssize_t backing_dev_store(struct device *dev, 726 struct device_attribute *attr, const char *buf, 727 size_t len) 728 { 729 char *file_name; 730 size_t sz; 731 struct file *backing_dev = NULL; 732 struct inode *inode; 733 unsigned int bitmap_sz; 734 unsigned long nr_pages, *bitmap = NULL; 735 int err; 736 struct zram *zram = dev_to_zram(dev); 737 738 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 739 if (!file_name) 740 return -ENOMEM; 741 742 guard(rwsem_write)(&zram->dev_lock); 743 if (init_done(zram)) { 744 pr_info("Can't setup backing device for initialized device\n"); 745 err = -EBUSY; 746 goto out; 747 } 748 749 strscpy(file_name, buf, PATH_MAX); 750 /* ignore trailing newline */ 751 sz = strlen(file_name); 752 if (sz > 0 && file_name[sz - 1] == '\n') 753 file_name[sz - 1] = 0x00; 754 755 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 756 if (IS_ERR(backing_dev)) { 757 err = PTR_ERR(backing_dev); 758 backing_dev = NULL; 759 goto out; 760 } 761 762 inode = backing_dev->f_mapping->host; 763 764 /* Support only block device in this moment */ 765 if (!S_ISBLK(inode->i_mode)) { 766 err = -ENOTBLK; 767 goto out; 768 } 769 770 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 771 /* Refuse to use zero sized device (also prevents self reference) */ 772 if (!nr_pages) { 773 err = -EINVAL; 774 goto out; 775 } 776 777 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 778 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 779 if (!bitmap) { 780 err = -ENOMEM; 781 goto out; 782 } 783 784 reset_bdev(zram); 785 786 zram->bdev = I_BDEV(inode); 787 zram->backing_dev = backing_dev; 788 zram->bitmap = bitmap; 789 zram->nr_pages = nr_pages; 790 791 pr_info("setup backing device %s\n", file_name); 792 kfree(file_name); 793 794 return len; 795 out: 796 kvfree(bitmap); 797 798 if (backing_dev) 799 filp_close(backing_dev, NULL); 800 801 kfree(file_name); 802 803 return err; 804 } 805 806 static unsigned long zram_reserve_bdev_block(struct zram *zram) 807 { 808 unsigned long blk_idx; 809 810 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); 811 if (blk_idx == zram->nr_pages) 812 return INVALID_BDEV_BLOCK; 813 814 set_bit(blk_idx, zram->bitmap); 815 atomic64_inc(&zram->stats.bd_count); 816 return blk_idx; 817 } 818 819 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 820 { 821 int was_set; 822 823 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 824 WARN_ON_ONCE(!was_set); 825 atomic64_dec(&zram->stats.bd_count); 826 } 827 828 static void release_wb_req(struct zram_wb_req *req) 829 { 830 __free_page(req->page); 831 kfree(req); 832 } 833 834 static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) 835 { 836 if (!wb_ctl) 837 return; 838 839 /* We should never have inflight requests at this point */ 840 WARN_ON(atomic_read(&wb_ctl->num_inflight)); 841 WARN_ON(!list_empty(&wb_ctl->done_reqs)); 842 843 while (!list_empty(&wb_ctl->idle_reqs)) { 844 struct zram_wb_req *req; 845 846 req = list_first_entry(&wb_ctl->idle_reqs, 847 struct zram_wb_req, entry); 848 list_del(&req->entry); 849 release_wb_req(req); 850 } 851 852 kfree_rcu(wb_ctl, rcu); 853 } 854 855 static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) 856 { 857 struct zram_wb_ctl *wb_ctl; 858 int i; 859 860 wb_ctl = kmalloc_obj(*wb_ctl); 861 if (!wb_ctl) 862 return NULL; 863 864 INIT_LIST_HEAD(&wb_ctl->idle_reqs); 865 INIT_LIST_HEAD(&wb_ctl->done_reqs); 866 atomic_set(&wb_ctl->num_inflight, 0); 867 init_waitqueue_head(&wb_ctl->done_wait); 868 spin_lock_init(&wb_ctl->done_lock); 869 870 for (i = 0; i < zram->wb_batch_size; i++) { 871 struct zram_wb_req *req; 872 873 /* 874 * This is fatal condition only if we couldn't allocate 875 * any requests at all. Otherwise we just work with the 876 * requests that we have successfully allocated, so that 877 * writeback can still proceed, even if there is only one 878 * request on the idle list. 879 */ 880 req = kzalloc_obj(*req, GFP_KERNEL | __GFP_NOWARN); 881 if (!req) 882 break; 883 884 req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); 885 if (!req->page) { 886 kfree(req); 887 break; 888 } 889 890 list_add(&req->entry, &wb_ctl->idle_reqs); 891 } 892 893 /* We couldn't allocate any requests, so writeabck is not possible */ 894 if (list_empty(&wb_ctl->idle_reqs)) 895 goto release_wb_ctl; 896 897 return wb_ctl; 898 899 release_wb_ctl: 900 release_wb_ctl(wb_ctl); 901 return NULL; 902 } 903 904 static void zram_account_writeback_rollback(struct zram *zram) 905 { 906 lockdep_assert_held_write(&zram->dev_lock); 907 908 if (zram->wb_limit_enable) 909 zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); 910 } 911 912 static void zram_account_writeback_submit(struct zram *zram) 913 { 914 lockdep_assert_held_write(&zram->dev_lock); 915 916 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 917 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 918 } 919 920 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 921 { 922 u32 index = req->pps->index; 923 int err; 924 925 err = blk_status_to_errno(req->bio.bi_status); 926 if (err) { 927 /* 928 * Failed wb requests should not be accounted in wb_limit 929 * (if enabled). 930 */ 931 zram_account_writeback_rollback(zram); 932 zram_release_bdev_block(zram, req->blk_idx); 933 return err; 934 } 935 936 atomic64_inc(&zram->stats.bd_writes); 937 slot_lock(zram, index); 938 /* 939 * We release slot lock during writeback so slot can change under us: 940 * slot_free() or slot_free() and zram_write_page(). In both cases 941 * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can 942 * set ZRAM_PP_SLOT on such slots until current post-processing 943 * finishes. 944 */ 945 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) { 946 zram_release_bdev_block(zram, req->blk_idx); 947 goto out; 948 } 949 950 clear_slot_flag(zram, index, ZRAM_IDLE); 951 if (test_slot_flag(zram, index, ZRAM_HUGE)) 952 atomic64_dec(&zram->stats.huge_pages); 953 atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); 954 zs_free(zram->mem_pool, get_slot_handle(zram, index)); 955 set_slot_handle(zram, index, req->blk_idx); 956 set_slot_flag(zram, index, ZRAM_WB); 957 958 out: 959 slot_unlock(zram, index); 960 return 0; 961 } 962 963 static void zram_writeback_endio(struct bio *bio) 964 { 965 struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); 966 struct zram_wb_ctl *wb_ctl = bio->bi_private; 967 unsigned long flags; 968 969 rcu_read_lock(); 970 spin_lock_irqsave(&wb_ctl->done_lock, flags); 971 list_add(&req->entry, &wb_ctl->done_reqs); 972 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 973 974 wake_up(&wb_ctl->done_wait); 975 rcu_read_unlock(); 976 } 977 978 static void zram_submit_wb_request(struct zram *zram, 979 struct zram_wb_ctl *wb_ctl, 980 struct zram_wb_req *req) 981 { 982 /* 983 * wb_limit (if enabled) should be adjusted before submission, 984 * so that we don't over-submit. 985 */ 986 zram_account_writeback_submit(zram); 987 atomic_inc(&wb_ctl->num_inflight); 988 req->bio.bi_private = wb_ctl; 989 submit_bio(&req->bio); 990 } 991 992 static int zram_complete_done_reqs(struct zram *zram, 993 struct zram_wb_ctl *wb_ctl) 994 { 995 struct zram_wb_req *req; 996 unsigned long flags; 997 int ret = 0, err; 998 999 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1000 spin_lock_irqsave(&wb_ctl->done_lock, flags); 1001 req = list_first_entry_or_null(&wb_ctl->done_reqs, 1002 struct zram_wb_req, entry); 1003 if (req) 1004 list_del(&req->entry); 1005 spin_unlock_irqrestore(&wb_ctl->done_lock, flags); 1006 1007 /* ->num_inflight > 0 doesn't mean we have done requests */ 1008 if (!req) 1009 break; 1010 1011 err = zram_writeback_complete(zram, req); 1012 if (err) 1013 ret = err; 1014 1015 atomic_dec(&wb_ctl->num_inflight); 1016 release_pp_slot(zram, req->pps); 1017 req->pps = NULL; 1018 1019 list_add(&req->entry, &wb_ctl->idle_reqs); 1020 } 1021 1022 return ret; 1023 } 1024 1025 static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) 1026 { 1027 struct zram_wb_req *req; 1028 1029 req = list_first_entry_or_null(&wb_ctl->idle_reqs, 1030 struct zram_wb_req, entry); 1031 if (req) 1032 list_del(&req->entry); 1033 return req; 1034 } 1035 1036 static int zram_writeback_slots(struct zram *zram, 1037 struct zram_pp_ctl *ctl, 1038 struct zram_wb_ctl *wb_ctl) 1039 { 1040 unsigned long blk_idx = INVALID_BDEV_BLOCK; 1041 struct zram_wb_req *req = NULL; 1042 struct zram_pp_slot *pps; 1043 int ret = 0, err = 0; 1044 u32 index = 0; 1045 1046 while ((pps = select_pp_slot(ctl))) { 1047 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 1048 ret = -EIO; 1049 break; 1050 } 1051 1052 while (!req) { 1053 req = zram_select_idle_req(wb_ctl); 1054 if (req) 1055 break; 1056 1057 wait_event(wb_ctl->done_wait, 1058 !list_empty(&wb_ctl->done_reqs)); 1059 1060 err = zram_complete_done_reqs(zram, wb_ctl); 1061 /* 1062 * BIO errors are not fatal, we continue and simply 1063 * attempt to writeback the remaining objects (pages). 1064 * At the same time we need to signal user-space that 1065 * some writes (at least one, but also could be all of 1066 * them) were not successful and we do so by returning 1067 * the most recent BIO error. 1068 */ 1069 if (err) 1070 ret = err; 1071 } 1072 1073 if (blk_idx == INVALID_BDEV_BLOCK) { 1074 blk_idx = zram_reserve_bdev_block(zram); 1075 if (blk_idx == INVALID_BDEV_BLOCK) { 1076 ret = -ENOSPC; 1077 break; 1078 } 1079 } 1080 1081 index = pps->index; 1082 slot_lock(zram, index); 1083 /* 1084 * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so 1085 * slots can change in the meantime. If slots are accessed or 1086 * freed they lose ZRAM_PP_SLOT flag and hence we don't 1087 * post-process them. 1088 */ 1089 if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) 1090 goto next; 1091 if (zram->compressed_wb) 1092 err = read_from_zspool_raw(zram, req->page, index); 1093 else 1094 err = read_from_zspool(zram, req->page, index); 1095 if (err) 1096 goto next; 1097 slot_unlock(zram, index); 1098 1099 /* 1100 * From now on pp-slot is owned by the req, remove it from 1101 * its pp bucket. 1102 */ 1103 list_del_init(&pps->entry); 1104 1105 req->blk_idx = blk_idx; 1106 req->pps = pps; 1107 bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); 1108 req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1109 req->bio.bi_end_io = zram_writeback_endio; 1110 __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); 1111 1112 zram_submit_wb_request(zram, wb_ctl, req); 1113 blk_idx = INVALID_BDEV_BLOCK; 1114 req = NULL; 1115 cond_resched(); 1116 continue; 1117 1118 next: 1119 slot_unlock(zram, index); 1120 release_pp_slot(zram, pps); 1121 } 1122 1123 /* 1124 * Selected idle req, but never submitted it due to some error or 1125 * wb limit. 1126 */ 1127 if (req) 1128 release_wb_req(req); 1129 1130 while (atomic_read(&wb_ctl->num_inflight) > 0) { 1131 wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); 1132 err = zram_complete_done_reqs(zram, wb_ctl); 1133 if (err) 1134 ret = err; 1135 } 1136 1137 return ret; 1138 } 1139 1140 #define PAGE_WRITEBACK 0 1141 #define HUGE_WRITEBACK (1 << 0) 1142 #define IDLE_WRITEBACK (1 << 1) 1143 #define INCOMPRESSIBLE_WRITEBACK (1 << 2) 1144 1145 static int parse_page_index(char *val, unsigned long nr_pages, 1146 unsigned long *lo, unsigned long *hi) 1147 { 1148 int ret; 1149 1150 ret = kstrtoul(val, 10, lo); 1151 if (ret) 1152 return ret; 1153 if (*lo >= nr_pages) 1154 return -ERANGE; 1155 *hi = *lo + 1; 1156 return 0; 1157 } 1158 1159 static int parse_page_indexes(char *val, unsigned long nr_pages, 1160 unsigned long *lo, unsigned long *hi) 1161 { 1162 char *delim; 1163 int ret; 1164 1165 delim = strchr(val, '-'); 1166 if (!delim) 1167 return -EINVAL; 1168 1169 *delim = 0x00; 1170 ret = kstrtoul(val, 10, lo); 1171 if (ret) 1172 return ret; 1173 if (*lo >= nr_pages) 1174 return -ERANGE; 1175 1176 ret = kstrtoul(delim + 1, 10, hi); 1177 if (ret) 1178 return ret; 1179 if (*hi >= nr_pages || *lo > *hi) 1180 return -ERANGE; 1181 *hi += 1; 1182 return 0; 1183 } 1184 1185 static int parse_mode(char *val, u32 *mode) 1186 { 1187 *mode = 0; 1188 1189 if (!strcmp(val, "idle")) 1190 *mode = IDLE_WRITEBACK; 1191 if (!strcmp(val, "huge")) 1192 *mode = HUGE_WRITEBACK; 1193 if (!strcmp(val, "huge_idle")) 1194 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 1195 if (!strcmp(val, "incompressible")) 1196 *mode = INCOMPRESSIBLE_WRITEBACK; 1197 1198 if (*mode == 0) 1199 return -EINVAL; 1200 return 0; 1201 } 1202 1203 static void scan_slots_for_writeback(struct zram *zram, u32 mode, 1204 unsigned long lo, unsigned long hi, 1205 struct zram_pp_ctl *ctl) 1206 { 1207 u32 index = lo; 1208 1209 while (index < hi) { 1210 bool ok = true; 1211 1212 slot_lock(zram, index); 1213 if (!slot_allocated(zram, index)) 1214 goto next; 1215 1216 if (test_slot_flag(zram, index, ZRAM_WB) || 1217 test_slot_flag(zram, index, ZRAM_SAME)) 1218 goto next; 1219 1220 if (mode & IDLE_WRITEBACK && 1221 !test_slot_flag(zram, index, ZRAM_IDLE)) 1222 goto next; 1223 if (mode & HUGE_WRITEBACK && 1224 !test_slot_flag(zram, index, ZRAM_HUGE)) 1225 goto next; 1226 if (mode & INCOMPRESSIBLE_WRITEBACK && 1227 !test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1228 goto next; 1229 1230 ok = place_pp_slot(zram, ctl, index); 1231 next: 1232 slot_unlock(zram, index); 1233 if (!ok) 1234 break; 1235 index++; 1236 } 1237 } 1238 1239 static ssize_t writeback_store(struct device *dev, 1240 struct device_attribute *attr, 1241 const char *buf, size_t len) 1242 { 1243 struct zram *zram = dev_to_zram(dev); 1244 u64 nr_pages = zram->disksize >> PAGE_SHIFT; 1245 unsigned long lo = 0, hi = nr_pages; 1246 struct zram_pp_ctl *pp_ctl = NULL; 1247 struct zram_wb_ctl *wb_ctl = NULL; 1248 char *args, *param, *val; 1249 ssize_t ret = len; 1250 int err, mode = 0; 1251 1252 guard(rwsem_write)(&zram->dev_lock); 1253 if (!init_done(zram)) 1254 return -EINVAL; 1255 1256 if (!zram->backing_dev) 1257 return -ENODEV; 1258 1259 pp_ctl = init_pp_ctl(); 1260 if (!pp_ctl) 1261 return -ENOMEM; 1262 1263 wb_ctl = init_wb_ctl(zram); 1264 if (!wb_ctl) { 1265 ret = -ENOMEM; 1266 goto out; 1267 } 1268 1269 args = skip_spaces(buf); 1270 while (*args) { 1271 args = next_arg(args, ¶m, &val); 1272 1273 /* 1274 * Workaround to support the old writeback interface. 1275 * 1276 * The old writeback interface has a minor inconsistency and 1277 * requires key=value only for page_index parameter, while the 1278 * writeback mode is a valueless parameter. 1279 * 1280 * This is not the case anymore and now all parameters are 1281 * required to have values, however, we need to support the 1282 * legacy writeback interface format so we check if we can 1283 * recognize a valueless parameter as the (legacy) writeback 1284 * mode. 1285 */ 1286 if (!val || !*val) { 1287 err = parse_mode(param, &mode); 1288 if (err) { 1289 ret = err; 1290 goto out; 1291 } 1292 1293 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1294 break; 1295 } 1296 1297 if (!strcmp(param, "type")) { 1298 err = parse_mode(val, &mode); 1299 if (err) { 1300 ret = err; 1301 goto out; 1302 } 1303 1304 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1305 break; 1306 } 1307 1308 if (!strcmp(param, "page_index")) { 1309 err = parse_page_index(val, nr_pages, &lo, &hi); 1310 if (err) { 1311 ret = err; 1312 goto out; 1313 } 1314 1315 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1316 continue; 1317 } 1318 1319 if (!strcmp(param, "page_indexes")) { 1320 err = parse_page_indexes(val, nr_pages, &lo, &hi); 1321 if (err) { 1322 ret = err; 1323 goto out; 1324 } 1325 1326 scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); 1327 continue; 1328 } 1329 } 1330 1331 err = zram_writeback_slots(zram, pp_ctl, wb_ctl); 1332 if (err) 1333 ret = err; 1334 1335 out: 1336 release_pp_ctl(zram, pp_ctl); 1337 release_wb_ctl(wb_ctl); 1338 1339 return ret; 1340 } 1341 1342 static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1343 { 1344 struct zcomp_strm *zstrm; 1345 unsigned int size; 1346 int ret, prio; 1347 void *src; 1348 1349 slot_lock(zram, index); 1350 /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1351 if (!test_slot_flag(zram, index, ZRAM_WB)) { 1352 slot_unlock(zram, index); 1353 /* We read some stale data, zero it out */ 1354 memset_page(page, 0, 0, PAGE_SIZE); 1355 return -EIO; 1356 } 1357 1358 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 1359 slot_unlock(zram, index); 1360 return 0; 1361 } 1362 1363 size = get_slot_size(zram, index); 1364 prio = get_slot_comp_priority(zram, index); 1365 1366 zstrm = zcomp_stream_get(zram->comps[prio]); 1367 src = kmap_local_page(page); 1368 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1369 zstrm->local_copy); 1370 if (!ret) 1371 copy_page(src, zstrm->local_copy); 1372 kunmap_local(src); 1373 zcomp_stream_put(zstrm); 1374 slot_unlock(zram, index); 1375 1376 return ret; 1377 } 1378 1379 static void zram_deferred_decompress(struct work_struct *w) 1380 { 1381 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1382 struct page *page = bio_first_page_all(req->bio); 1383 struct zram *zram = req->zram; 1384 u32 index = req->index; 1385 int ret; 1386 1387 ret = decompress_bdev_page(zram, page, index); 1388 if (ret) 1389 req->parent->bi_status = BLK_STS_IOERR; 1390 1391 /* Decrement parent's ->remaining */ 1392 bio_endio(req->parent); 1393 bio_put(req->bio); 1394 kfree(req); 1395 } 1396 1397 static void zram_async_read_endio(struct bio *bio) 1398 { 1399 struct zram_rb_req *req = bio->bi_private; 1400 struct zram *zram = req->zram; 1401 1402 if (bio->bi_status) { 1403 req->parent->bi_status = bio->bi_status; 1404 bio_endio(req->parent); 1405 bio_put(bio); 1406 kfree(req); 1407 return; 1408 } 1409 1410 /* 1411 * NOTE: zram_async_read_endio() is not exactly right place for this. 1412 * Ideally, we need to do it after ZRAM_WB check, but this requires 1413 * us to use wq path even on systems that don't enable compressed 1414 * writeback, because we cannot take slot-lock in the current context. 1415 * 1416 * Keep the existing behavior for now. 1417 */ 1418 if (zram->compressed_wb == false) { 1419 /* No decompression needed, complete the parent IO */ 1420 bio_endio(req->parent); 1421 bio_put(bio); 1422 kfree(req); 1423 return; 1424 } 1425 1426 /* 1427 * zram decompression is sleepable, so we need to deffer it to 1428 * a preemptible context. 1429 */ 1430 INIT_WORK(&req->work, zram_deferred_decompress); 1431 queue_work(system_highpri_wq, &req->work); 1432 } 1433 1434 static int read_from_bdev_async(struct zram *zram, struct page *page, 1435 u32 index, unsigned long blk_idx, 1436 struct bio *parent) 1437 { 1438 struct zram_rb_req *req; 1439 struct bio *bio; 1440 1441 req = kmalloc_obj(*req, GFP_NOIO); 1442 if (!req) 1443 return -ENOMEM; 1444 1445 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1446 if (!bio) { 1447 kfree(req); 1448 return -ENOMEM; 1449 } 1450 1451 req->zram = zram; 1452 req->index = index; 1453 req->blk_idx = blk_idx; 1454 req->bio = bio; 1455 req->parent = parent; 1456 1457 bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1458 bio->bi_private = req; 1459 bio->bi_end_io = zram_async_read_endio; 1460 1461 __bio_add_page(bio, page, PAGE_SIZE, 0); 1462 bio_inc_remaining(parent); 1463 submit_bio(bio); 1464 1465 return 0; 1466 } 1467 1468 static void zram_sync_read(struct work_struct *w) 1469 { 1470 struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1471 struct bio_vec bv; 1472 struct bio bio; 1473 1474 bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1475 bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1476 __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1477 req->error = submit_bio_wait(&bio); 1478 } 1479 1480 /* 1481 * Block layer want one ->submit_bio to be active at a time, so if we use 1482 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1483 * use a worker thread context. 1484 */ 1485 static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1486 unsigned long blk_idx) 1487 { 1488 struct zram_rb_req req; 1489 1490 req.page = page; 1491 req.zram = zram; 1492 req.blk_idx = blk_idx; 1493 1494 INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1495 queue_work(system_dfl_wq, &req.work); 1496 flush_work(&req.work); 1497 destroy_work_on_stack(&req.work); 1498 1499 if (req.error || zram->compressed_wb == false) 1500 return req.error; 1501 1502 return decompress_bdev_page(zram, page, index); 1503 } 1504 1505 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1506 unsigned long blk_idx, struct bio *parent) 1507 { 1508 atomic64_inc(&zram->stats.bd_reads); 1509 if (!parent) { 1510 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1511 return -EIO; 1512 return read_from_bdev_sync(zram, page, index, blk_idx); 1513 } 1514 return read_from_bdev_async(zram, page, index, blk_idx, parent); 1515 } 1516 #else 1517 static inline void reset_bdev(struct zram *zram) {}; 1518 static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1519 unsigned long blk_idx, struct bio *parent) 1520 { 1521 return -EIO; 1522 } 1523 1524 static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) 1525 { 1526 } 1527 #endif 1528 1529 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1530 1531 static struct dentry *zram_debugfs_root; 1532 1533 static void zram_debugfs_create(void) 1534 { 1535 zram_debugfs_root = debugfs_create_dir("zram", NULL); 1536 } 1537 1538 static void zram_debugfs_destroy(void) 1539 { 1540 debugfs_remove_recursive(zram_debugfs_root); 1541 } 1542 1543 static ssize_t read_block_state(struct file *file, char __user *buf, 1544 size_t count, loff_t *ppos) 1545 { 1546 char *kbuf; 1547 ssize_t index, written = 0; 1548 struct zram *zram = file->private_data; 1549 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1550 1551 kbuf = kvmalloc(count, GFP_KERNEL); 1552 if (!kbuf) 1553 return -ENOMEM; 1554 1555 guard(rwsem_read)(&zram->dev_lock); 1556 if (!init_done(zram)) { 1557 kvfree(kbuf); 1558 return -EINVAL; 1559 } 1560 1561 for (index = *ppos; index < nr_pages; index++) { 1562 int copied; 1563 1564 slot_lock(zram, index); 1565 if (!slot_allocated(zram, index)) 1566 goto next; 1567 1568 copied = snprintf(kbuf + written, count, 1569 "%12zd %12u.%06d %c%c%c%c%c%c\n", 1570 index, zram->table[index].attr.ac_time, 0, 1571 test_slot_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1572 test_slot_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1573 test_slot_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1574 test_slot_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1575 get_slot_comp_priority(zram, index) ? 'r' : '.', 1576 test_slot_flag(zram, index, 1577 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1578 1579 if (count <= copied) { 1580 slot_unlock(zram, index); 1581 break; 1582 } 1583 written += copied; 1584 count -= copied; 1585 next: 1586 slot_unlock(zram, index); 1587 *ppos += 1; 1588 } 1589 1590 if (copy_to_user(buf, kbuf, written)) 1591 written = -EFAULT; 1592 kvfree(kbuf); 1593 1594 return written; 1595 } 1596 1597 static const struct file_operations proc_zram_block_state_op = { 1598 .open = simple_open, 1599 .read = read_block_state, 1600 .llseek = default_llseek, 1601 }; 1602 1603 static void zram_debugfs_register(struct zram *zram) 1604 { 1605 if (!zram_debugfs_root) 1606 return; 1607 1608 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1609 zram_debugfs_root); 1610 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1611 zram, &proc_zram_block_state_op); 1612 } 1613 1614 static void zram_debugfs_unregister(struct zram *zram) 1615 { 1616 debugfs_remove_recursive(zram->debugfs_dir); 1617 } 1618 #else 1619 static void zram_debugfs_create(void) {}; 1620 static void zram_debugfs_destroy(void) {}; 1621 static void zram_debugfs_register(struct zram *zram) {}; 1622 static void zram_debugfs_unregister(struct zram *zram) {}; 1623 #endif 1624 1625 /* Only algo parameter given, lookup by algo name */ 1626 static int lookup_algo_priority(struct zram *zram, const char *algo, 1627 u32 min_prio) 1628 { 1629 s32 prio; 1630 1631 for (prio = min_prio; prio < ZRAM_MAX_COMPS; prio++) { 1632 if (!zram->comp_algs[prio]) 1633 continue; 1634 1635 if (!strcmp(zram->comp_algs[prio], algo)) 1636 return prio; 1637 } 1638 1639 return -EINVAL; 1640 } 1641 1642 /* Both algo and priority parameters given, validate them */ 1643 static int validate_algo_priority(struct zram *zram, const char *algo, u32 prio) 1644 { 1645 if (prio >= ZRAM_MAX_COMPS) 1646 return -EINVAL; 1647 /* No algo at given priority */ 1648 if (!zram->comp_algs[prio]) 1649 return -EINVAL; 1650 /* A different algo at given priority */ 1651 if (strcmp(zram->comp_algs[prio], algo)) 1652 return -EINVAL; 1653 return 0; 1654 } 1655 1656 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1657 { 1658 zram->comp_algs[prio] = alg; 1659 } 1660 1661 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1662 { 1663 const char *alg; 1664 size_t sz; 1665 1666 sz = strlen(buf); 1667 if (sz >= ZRAM_MAX_ALGO_NAME_SZ) 1668 return -E2BIG; 1669 1670 alg = zcomp_lookup_backend_name(buf); 1671 if (!alg) 1672 return -EINVAL; 1673 1674 guard(rwsem_write)(&zram->dev_lock); 1675 if (init_done(zram)) { 1676 pr_info("Can't change algorithm for initialized device\n"); 1677 return -EBUSY; 1678 } 1679 1680 comp_algorithm_set(zram, prio, alg); 1681 return 0; 1682 } 1683 1684 static void comp_params_reset(struct zram *zram, u32 prio) 1685 { 1686 struct zcomp_params *params = &zram->params[prio]; 1687 1688 vfree(params->dict); 1689 params->level = ZCOMP_PARAM_NOT_SET; 1690 params->deflate.winbits = ZCOMP_PARAM_NOT_SET; 1691 params->dict_sz = 0; 1692 params->dict = NULL; 1693 } 1694 1695 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1696 const char *dict_path, 1697 struct deflate_params *deflate_params) 1698 { 1699 ssize_t sz = 0; 1700 1701 comp_params_reset(zram, prio); 1702 1703 if (dict_path) { 1704 sz = kernel_read_file_from_path(dict_path, 0, 1705 &zram->params[prio].dict, 1706 INT_MAX, 1707 NULL, 1708 READING_POLICY); 1709 if (sz < 0) 1710 return -EINVAL; 1711 } 1712 1713 zram->params[prio].dict_sz = sz; 1714 zram->params[prio].level = level; 1715 zram->params[prio].deflate.winbits = deflate_params->winbits; 1716 return 0; 1717 } 1718 1719 static ssize_t algorithm_params_store(struct device *dev, 1720 struct device_attribute *attr, 1721 const char *buf, 1722 size_t len) 1723 { 1724 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; 1725 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1726 struct deflate_params deflate_params; 1727 struct zram *zram = dev_to_zram(dev); 1728 bool prio_param = false; 1729 int ret; 1730 1731 deflate_params.winbits = ZCOMP_PARAM_NOT_SET; 1732 1733 args = skip_spaces(buf); 1734 while (*args) { 1735 args = next_arg(args, ¶m, &val); 1736 1737 if (!val || !*val) 1738 return -EINVAL; 1739 1740 if (!strcmp(param, "priority")) { 1741 prio_param = true; 1742 ret = kstrtoint(val, 10, &prio); 1743 if (ret) 1744 return ret; 1745 continue; 1746 } 1747 1748 if (!strcmp(param, "level")) { 1749 ret = kstrtoint(val, 10, &level); 1750 if (ret) 1751 return ret; 1752 continue; 1753 } 1754 1755 if (!strcmp(param, "algo")) { 1756 algo = val; 1757 continue; 1758 } 1759 1760 if (!strcmp(param, "dict")) { 1761 dict_path = val; 1762 continue; 1763 } 1764 1765 if (!strcmp(param, "deflate.winbits")) { 1766 ret = kstrtoint(val, 10, &deflate_params.winbits); 1767 if (ret) 1768 return ret; 1769 continue; 1770 } 1771 } 1772 1773 guard(rwsem_write)(&zram->dev_lock); 1774 if (init_done(zram)) 1775 return -EBUSY; 1776 1777 if (prio_param) { 1778 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1779 return -EINVAL; 1780 } 1781 1782 if (algo && prio_param) { 1783 ret = validate_algo_priority(zram, algo, prio); 1784 if (ret) 1785 return ret; 1786 } 1787 1788 if (algo && !prio_param) { 1789 prio = lookup_algo_priority(zram, algo, ZRAM_PRIMARY_COMP); 1790 if (prio < 0) 1791 return -EINVAL; 1792 } 1793 1794 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); 1795 return ret ? ret : len; 1796 } 1797 1798 static ssize_t comp_algorithm_show(struct device *dev, 1799 struct device_attribute *attr, 1800 char *buf) 1801 { 1802 struct zram *zram = dev_to_zram(dev); 1803 ssize_t sz; 1804 1805 guard(rwsem_read)(&zram->dev_lock); 1806 sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); 1807 return sz; 1808 } 1809 1810 static ssize_t comp_algorithm_store(struct device *dev, 1811 struct device_attribute *attr, 1812 const char *buf, 1813 size_t len) 1814 { 1815 struct zram *zram = dev_to_zram(dev); 1816 int ret; 1817 1818 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1819 return ret ? ret : len; 1820 } 1821 1822 #ifdef CONFIG_ZRAM_MULTI_COMP 1823 static ssize_t recomp_algorithm_show(struct device *dev, 1824 struct device_attribute *attr, 1825 char *buf) 1826 { 1827 struct zram *zram = dev_to_zram(dev); 1828 ssize_t sz = 0; 1829 u32 prio; 1830 1831 guard(rwsem_read)(&zram->dev_lock); 1832 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1833 if (!zram->comp_algs[prio]) 1834 continue; 1835 1836 sz += sysfs_emit_at(buf, sz, "#%d: ", prio); 1837 sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); 1838 } 1839 return sz; 1840 } 1841 1842 static ssize_t recomp_algorithm_store(struct device *dev, 1843 struct device_attribute *attr, 1844 const char *buf, 1845 size_t len) 1846 { 1847 struct zram *zram = dev_to_zram(dev); 1848 int prio = ZRAM_SECONDARY_COMP; 1849 char *args, *param, *val; 1850 char *alg = NULL; 1851 int ret; 1852 1853 args = skip_spaces(buf); 1854 while (*args) { 1855 args = next_arg(args, ¶m, &val); 1856 1857 if (!val || !*val) 1858 return -EINVAL; 1859 1860 if (!strcmp(param, "algo")) { 1861 alg = val; 1862 continue; 1863 } 1864 1865 if (!strcmp(param, "priority")) { 1866 ret = kstrtoint(val, 10, &prio); 1867 if (ret) 1868 return ret; 1869 continue; 1870 } 1871 } 1872 1873 if (!alg) 1874 return -EINVAL; 1875 1876 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1877 return -EINVAL; 1878 1879 ret = __comp_algorithm_store(zram, prio, alg); 1880 return ret ? ret : len; 1881 } 1882 #endif 1883 1884 static ssize_t compact_store(struct device *dev, struct device_attribute *attr, 1885 const char *buf, size_t len) 1886 { 1887 struct zram *zram = dev_to_zram(dev); 1888 1889 guard(rwsem_read)(&zram->dev_lock); 1890 if (!init_done(zram)) 1891 return -EINVAL; 1892 1893 zs_compact(zram->mem_pool); 1894 1895 return len; 1896 } 1897 1898 static ssize_t io_stat_show(struct device *dev, struct device_attribute *attr, 1899 char *buf) 1900 { 1901 struct zram *zram = dev_to_zram(dev); 1902 ssize_t ret; 1903 1904 guard(rwsem_read)(&zram->dev_lock); 1905 ret = sysfs_emit(buf, 1906 "%8llu %8llu 0 %8llu\n", 1907 (u64)atomic64_read(&zram->stats.failed_reads), 1908 (u64)atomic64_read(&zram->stats.failed_writes), 1909 (u64)atomic64_read(&zram->stats.notify_free)); 1910 1911 return ret; 1912 } 1913 1914 static ssize_t mm_stat_show(struct device *dev, struct device_attribute *attr, 1915 char *buf) 1916 { 1917 struct zram *zram = dev_to_zram(dev); 1918 struct zs_pool_stats pool_stats; 1919 u64 orig_size, mem_used = 0; 1920 long max_used; 1921 ssize_t ret; 1922 1923 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1924 1925 guard(rwsem_read)(&zram->dev_lock); 1926 if (init_done(zram)) { 1927 mem_used = zs_get_total_pages(zram->mem_pool); 1928 zs_pool_stats(zram->mem_pool, &pool_stats); 1929 } 1930 1931 orig_size = atomic64_read(&zram->stats.pages_stored); 1932 max_used = atomic_long_read(&zram->stats.max_used_pages); 1933 1934 ret = sysfs_emit(buf, 1935 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1936 orig_size << PAGE_SHIFT, 1937 (u64)atomic64_read(&zram->stats.compr_data_size), 1938 mem_used << PAGE_SHIFT, 1939 zram->limit_pages << PAGE_SHIFT, 1940 max_used << PAGE_SHIFT, 1941 (u64)atomic64_read(&zram->stats.same_pages), 1942 atomic_long_read(&pool_stats.pages_compacted), 1943 (u64)atomic64_read(&zram->stats.huge_pages), 1944 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1945 1946 return ret; 1947 } 1948 1949 static ssize_t debug_stat_show(struct device *dev, 1950 struct device_attribute *attr, char *buf) 1951 { 1952 int version = 1; 1953 struct zram *zram = dev_to_zram(dev); 1954 ssize_t ret; 1955 1956 guard(rwsem_read)(&zram->dev_lock); 1957 ret = sysfs_emit(buf, 1958 "version: %d\n0 %8llu\n", 1959 version, 1960 (u64)atomic64_read(&zram->stats.miss_free)); 1961 1962 return ret; 1963 } 1964 1965 static void zram_meta_free(struct zram *zram, u64 disksize) 1966 { 1967 size_t num_pages = disksize >> PAGE_SHIFT; 1968 size_t index; 1969 1970 if (!zram->table) 1971 return; 1972 1973 /* Free all pages that are still in this zram device */ 1974 for (index = 0; index < num_pages; index++) 1975 slot_free(zram, index); 1976 1977 zs_destroy_pool(zram->mem_pool); 1978 vfree(zram->table); 1979 zram->table = NULL; 1980 } 1981 1982 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1983 { 1984 size_t num_pages, index; 1985 1986 num_pages = disksize >> PAGE_SHIFT; 1987 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1988 if (!zram->table) 1989 return false; 1990 1991 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1992 if (!zram->mem_pool) { 1993 vfree(zram->table); 1994 zram->table = NULL; 1995 return false; 1996 } 1997 1998 if (!huge_class_size) 1999 huge_class_size = zs_huge_class_size(zram->mem_pool); 2000 2001 for (index = 0; index < num_pages; index++) 2002 slot_lock_init(zram, index); 2003 2004 return true; 2005 } 2006 2007 static void slot_free(struct zram *zram, u32 index) 2008 { 2009 unsigned long handle; 2010 2011 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 2012 zram->table[index].attr.ac_time = 0; 2013 #endif 2014 2015 clear_slot_flag(zram, index, ZRAM_IDLE); 2016 clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2017 clear_slot_flag(zram, index, ZRAM_PP_SLOT); 2018 set_slot_comp_priority(zram, index, 0); 2019 2020 if (test_slot_flag(zram, index, ZRAM_HUGE)) { 2021 /* 2022 * Writeback completion decrements ->huge_pages but keeps 2023 * ZRAM_HUGE flag for deferred decompression path. 2024 */ 2025 if (!test_slot_flag(zram, index, ZRAM_WB)) 2026 atomic64_dec(&zram->stats.huge_pages); 2027 clear_slot_flag(zram, index, ZRAM_HUGE); 2028 } 2029 2030 if (test_slot_flag(zram, index, ZRAM_WB)) { 2031 clear_slot_flag(zram, index, ZRAM_WB); 2032 zram_release_bdev_block(zram, get_slot_handle(zram, index)); 2033 goto out; 2034 } 2035 2036 /* 2037 * No memory is allocated for same element filled pages. 2038 * Simply clear same page flag. 2039 */ 2040 if (test_slot_flag(zram, index, ZRAM_SAME)) { 2041 clear_slot_flag(zram, index, ZRAM_SAME); 2042 atomic64_dec(&zram->stats.same_pages); 2043 goto out; 2044 } 2045 2046 handle = get_slot_handle(zram, index); 2047 if (!handle) 2048 return; 2049 2050 zs_free(zram->mem_pool, handle); 2051 2052 atomic64_sub(get_slot_size(zram, index), 2053 &zram->stats.compr_data_size); 2054 out: 2055 atomic64_dec(&zram->stats.pages_stored); 2056 set_slot_handle(zram, index, 0); 2057 set_slot_size(zram, index, 0); 2058 } 2059 2060 static int read_same_filled_page(struct zram *zram, struct page *page, 2061 u32 index) 2062 { 2063 void *mem; 2064 2065 mem = kmap_local_page(page); 2066 zram_fill_page(mem, PAGE_SIZE, get_slot_handle(zram, index)); 2067 kunmap_local(mem); 2068 return 0; 2069 } 2070 2071 static int read_incompressible_page(struct zram *zram, struct page *page, 2072 u32 index) 2073 { 2074 unsigned long handle; 2075 void *src, *dst; 2076 2077 handle = get_slot_handle(zram, index); 2078 src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL); 2079 dst = kmap_local_page(page); 2080 copy_page(dst, src); 2081 kunmap_local(dst); 2082 zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src); 2083 2084 return 0; 2085 } 2086 2087 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 2088 { 2089 struct zcomp_strm *zstrm; 2090 unsigned long handle; 2091 unsigned int size; 2092 void *src, *dst; 2093 int ret, prio; 2094 2095 handle = get_slot_handle(zram, index); 2096 size = get_slot_size(zram, index); 2097 prio = get_slot_comp_priority(zram, index); 2098 2099 zstrm = zcomp_stream_get(zram->comps[prio]); 2100 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2101 zstrm->local_copy); 2102 dst = kmap_local_page(page); 2103 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 2104 kunmap_local(dst); 2105 zs_obj_read_end(zram->mem_pool, handle, size, src); 2106 zcomp_stream_put(zstrm); 2107 2108 return ret; 2109 } 2110 2111 #if defined CONFIG_ZRAM_WRITEBACK 2112 static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 2113 { 2114 struct zcomp_strm *zstrm; 2115 unsigned long handle; 2116 unsigned int size; 2117 void *src; 2118 2119 handle = get_slot_handle(zram, index); 2120 size = get_slot_size(zram, index); 2121 2122 /* 2123 * We need to get stream just for ->local_copy buffer, in 2124 * case if object spans two physical pages. No decompression 2125 * takes place here, as we read raw compressed data. 2126 */ 2127 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2128 src = zs_obj_read_begin(zram->mem_pool, handle, size, 2129 zstrm->local_copy); 2130 memcpy_to_page(page, 0, src, size); 2131 zs_obj_read_end(zram->mem_pool, handle, size, src); 2132 zcomp_stream_put(zstrm); 2133 2134 return 0; 2135 } 2136 #endif 2137 2138 /* 2139 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2140 * Corresponding ZRAM slot should be locked. 2141 */ 2142 static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2143 { 2144 if (test_slot_flag(zram, index, ZRAM_SAME) || 2145 !get_slot_handle(zram, index)) 2146 return read_same_filled_page(zram, page, index); 2147 2148 if (!test_slot_flag(zram, index, ZRAM_HUGE)) 2149 return read_compressed_page(zram, page, index); 2150 else 2151 return read_incompressible_page(zram, page, index); 2152 } 2153 2154 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 2155 struct bio *parent) 2156 { 2157 int ret; 2158 2159 slot_lock(zram, index); 2160 if (!test_slot_flag(zram, index, ZRAM_WB)) { 2161 /* Slot should be locked through out the function call */ 2162 ret = read_from_zspool(zram, page, index); 2163 slot_unlock(zram, index); 2164 } else { 2165 unsigned long blk_idx = get_slot_handle(zram, index); 2166 2167 /* 2168 * The slot should be unlocked before reading from the backing 2169 * device. 2170 */ 2171 slot_unlock(zram, index); 2172 ret = read_from_bdev(zram, page, index, blk_idx, parent); 2173 } 2174 2175 /* Should NEVER happen. Return bio error if it does. */ 2176 if (WARN_ON(ret < 0)) 2177 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 2178 2179 return ret; 2180 } 2181 2182 /* 2183 * Use a temporary buffer to decompress the page, as the decompressor 2184 * always expects a full page for the output. 2185 */ 2186 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 2187 u32 index, int offset) 2188 { 2189 struct page *page = alloc_page(GFP_NOIO); 2190 int ret; 2191 2192 if (!page) 2193 return -ENOMEM; 2194 ret = zram_read_page(zram, page, index, NULL); 2195 if (likely(!ret)) 2196 memcpy_to_bvec(bvec, page_address(page) + offset); 2197 __free_page(page); 2198 return ret; 2199 } 2200 2201 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 2202 u32 index, int offset, struct bio *bio) 2203 { 2204 if (is_partial_io(bvec)) 2205 return zram_bvec_read_partial(zram, bvec, index, offset); 2206 return zram_read_page(zram, bvec->bv_page, index, bio); 2207 } 2208 2209 static int write_same_filled_page(struct zram *zram, unsigned long fill, 2210 u32 index) 2211 { 2212 slot_lock(zram, index); 2213 slot_free(zram, index); 2214 set_slot_flag(zram, index, ZRAM_SAME); 2215 set_slot_handle(zram, index, fill); 2216 slot_unlock(zram, index); 2217 2218 atomic64_inc(&zram->stats.same_pages); 2219 atomic64_inc(&zram->stats.pages_stored); 2220 2221 return 0; 2222 } 2223 2224 static int write_incompressible_page(struct zram *zram, struct page *page, 2225 u32 index) 2226 { 2227 unsigned long handle; 2228 void *src; 2229 2230 /* 2231 * This function is called from preemptible context so we don't need 2232 * to do optimistic and fallback to pessimistic handle allocation, 2233 * like we do for compressible pages. 2234 */ 2235 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 2236 GFP_NOIO | __GFP_NOWARN | 2237 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2238 if (IS_ERR_VALUE(handle)) 2239 return PTR_ERR((void *)handle); 2240 2241 if (!zram_can_store_page(zram)) { 2242 zs_free(zram->mem_pool, handle); 2243 return -ENOMEM; 2244 } 2245 2246 src = kmap_local_page(page); 2247 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); 2248 kunmap_local(src); 2249 2250 slot_lock(zram, index); 2251 slot_free(zram, index); 2252 set_slot_flag(zram, index, ZRAM_HUGE); 2253 set_slot_handle(zram, index, handle); 2254 set_slot_size(zram, index, PAGE_SIZE); 2255 slot_unlock(zram, index); 2256 2257 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 2258 atomic64_inc(&zram->stats.huge_pages); 2259 atomic64_inc(&zram->stats.huge_pages_since); 2260 atomic64_inc(&zram->stats.pages_stored); 2261 2262 return 0; 2263 } 2264 2265 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 2266 { 2267 int ret = 0; 2268 unsigned long handle; 2269 unsigned int comp_len; 2270 void *mem; 2271 struct zcomp_strm *zstrm; 2272 unsigned long element; 2273 bool same_filled; 2274 2275 mem = kmap_local_page(page); 2276 same_filled = page_same_filled(mem, &element); 2277 kunmap_local(mem); 2278 if (same_filled) 2279 return write_same_filled_page(zram, element, index); 2280 2281 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 2282 mem = kmap_local_page(page); 2283 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 2284 mem, &comp_len); 2285 kunmap_local(mem); 2286 2287 if (unlikely(ret)) { 2288 zcomp_stream_put(zstrm); 2289 pr_err("Compression failed! err=%d\n", ret); 2290 return ret; 2291 } 2292 2293 if (comp_len >= huge_class_size) { 2294 zcomp_stream_put(zstrm); 2295 return write_incompressible_page(zram, page, index); 2296 } 2297 2298 handle = zs_malloc(zram->mem_pool, comp_len, 2299 GFP_NOIO | __GFP_NOWARN | 2300 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); 2301 if (IS_ERR_VALUE(handle)) { 2302 zcomp_stream_put(zstrm); 2303 return PTR_ERR((void *)handle); 2304 } 2305 2306 if (!zram_can_store_page(zram)) { 2307 zcomp_stream_put(zstrm); 2308 zs_free(zram->mem_pool, handle); 2309 return -ENOMEM; 2310 } 2311 2312 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); 2313 zcomp_stream_put(zstrm); 2314 2315 slot_lock(zram, index); 2316 slot_free(zram, index); 2317 set_slot_handle(zram, index, handle); 2318 set_slot_size(zram, index, comp_len); 2319 slot_unlock(zram, index); 2320 2321 /* Update stats */ 2322 atomic64_inc(&zram->stats.pages_stored); 2323 atomic64_add(comp_len, &zram->stats.compr_data_size); 2324 2325 return ret; 2326 } 2327 2328 /* 2329 * This is a partial IO. Read the full page before writing the changes. 2330 */ 2331 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 2332 u32 index, int offset, struct bio *bio) 2333 { 2334 struct page *page = alloc_page(GFP_NOIO); 2335 int ret; 2336 2337 if (!page) 2338 return -ENOMEM; 2339 2340 ret = zram_read_page(zram, page, index, NULL); 2341 if (!ret) { 2342 memcpy_from_bvec(page_address(page) + offset, bvec); 2343 ret = zram_write_page(zram, page, index); 2344 } 2345 __free_page(page); 2346 return ret; 2347 } 2348 2349 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 2350 u32 index, int offset, struct bio *bio) 2351 { 2352 if (is_partial_io(bvec)) 2353 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 2354 return zram_write_page(zram, bvec->bv_page, index); 2355 } 2356 2357 #ifdef CONFIG_ZRAM_MULTI_COMP 2358 #define RECOMPRESS_IDLE (1 << 0) 2359 #define RECOMPRESS_HUGE (1 << 1) 2360 2361 static bool highest_priority_algorithm(struct zram *zram, u32 prio) 2362 { 2363 u32 p; 2364 2365 for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) { 2366 if (zram->comp_algs[p]) 2367 return false; 2368 } 2369 2370 return true; 2371 } 2372 2373 static void scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio, 2374 struct zram_pp_ctl *ctl) 2375 { 2376 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 2377 unsigned long index; 2378 2379 for (index = 0; index < nr_pages; index++) { 2380 bool ok = true; 2381 2382 slot_lock(zram, index); 2383 if (!slot_allocated(zram, index)) 2384 goto next; 2385 2386 if (mode & RECOMPRESS_IDLE && 2387 !test_slot_flag(zram, index, ZRAM_IDLE)) 2388 goto next; 2389 2390 if (mode & RECOMPRESS_HUGE && 2391 !test_slot_flag(zram, index, ZRAM_HUGE)) 2392 goto next; 2393 2394 if (test_slot_flag(zram, index, ZRAM_WB) || 2395 test_slot_flag(zram, index, ZRAM_SAME) || 2396 test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 2397 goto next; 2398 2399 /* Already compressed with same or higher priority */ 2400 if (get_slot_comp_priority(zram, index) >= prio) 2401 goto next; 2402 2403 ok = place_pp_slot(zram, ctl, index); 2404 next: 2405 slot_unlock(zram, index); 2406 if (!ok) 2407 break; 2408 } 2409 } 2410 2411 /* 2412 * This function will decompress (unless it's ZRAM_HUGE) the page and then 2413 * attempt to compress it using provided compression algorithm priority 2414 * (which is potentially more effective). 2415 * 2416 * Corresponding ZRAM slot should be locked. 2417 */ 2418 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 2419 u64 *num_recomp_pages, u32 threshold, u32 prio) 2420 { 2421 struct zcomp_strm *zstrm = NULL; 2422 unsigned long handle_old; 2423 unsigned long handle_new; 2424 unsigned int comp_len_old; 2425 unsigned int comp_len_new; 2426 unsigned int class_index_old; 2427 unsigned int class_index_new; 2428 void *src; 2429 int ret = 0; 2430 2431 handle_old = get_slot_handle(zram, index); 2432 if (!handle_old) 2433 return -EINVAL; 2434 2435 comp_len_old = get_slot_size(zram, index); 2436 /* 2437 * Do not recompress objects that are already "small enough". 2438 */ 2439 if (comp_len_old < threshold) 2440 return 0; 2441 2442 ret = read_from_zspool(zram, page, index); 2443 if (ret) 2444 return ret; 2445 2446 /* 2447 * We touched this entry so mark it as non-IDLE. This makes sure that 2448 * we don't preserve IDLE flag and don't incorrectly pick this entry 2449 * for different post-processing type (e.g. writeback). 2450 */ 2451 clear_slot_flag(zram, index, ZRAM_IDLE); 2452 2453 zstrm = zcomp_stream_get(zram->comps[prio]); 2454 src = kmap_local_page(page); 2455 ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new); 2456 kunmap_local(src); 2457 2458 /* 2459 * Decrement the limit (if set) on pages we can recompress, even 2460 * when current recompression was unsuccessful or did not compress 2461 * the page below the threshold, because we still spent resources 2462 * on it. 2463 */ 2464 if (*num_recomp_pages) 2465 *num_recomp_pages -= 1; 2466 2467 if (ret) { 2468 zcomp_stream_put(zstrm); 2469 return ret; 2470 } 2471 2472 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 2473 class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new); 2474 2475 if (class_index_new >= class_index_old || 2476 (threshold && comp_len_new >= threshold)) { 2477 zcomp_stream_put(zstrm); 2478 2479 /* 2480 * Secondary algorithms failed to re-compress the page 2481 * in a way that would save memory. 2482 * 2483 * Mark the object incompressible if the max-priority (the 2484 * last configured one) algorithm couldn't re-compress it. 2485 */ 2486 if (highest_priority_algorithm(zram, prio)) 2487 set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); 2488 return 0; 2489 } 2490 2491 /* 2492 * We are holding per-CPU stream mutex and entry lock so better 2493 * avoid direct reclaim. Allocation error is not fatal since 2494 * we still have the old object in the mem_pool. 2495 * 2496 * XXX: technically, the node we really want here is the node that 2497 * holds the original compressed data. But that would require us to 2498 * modify zsmalloc API to return this information. For now, we will 2499 * make do with the node of the page allocated for recompression. 2500 */ 2501 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2502 GFP_NOIO | __GFP_NOWARN | 2503 __GFP_HIGHMEM | __GFP_MOVABLE, 2504 page_to_nid(page)); 2505 if (IS_ERR_VALUE(handle_new)) { 2506 zcomp_stream_put(zstrm); 2507 return PTR_ERR((void *)handle_new); 2508 } 2509 2510 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); 2511 zcomp_stream_put(zstrm); 2512 2513 slot_free(zram, index); 2514 set_slot_handle(zram, index, handle_new); 2515 set_slot_size(zram, index, comp_len_new); 2516 set_slot_comp_priority(zram, index, prio); 2517 2518 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2519 atomic64_inc(&zram->stats.pages_stored); 2520 2521 return 0; 2522 } 2523 2524 static ssize_t recompress_store(struct device *dev, 2525 struct device_attribute *attr, 2526 const char *buf, size_t len) 2527 { 2528 struct zram *zram = dev_to_zram(dev); 2529 char *args, *param, *val, *algo = NULL; 2530 u64 num_recomp_pages = ULLONG_MAX; 2531 struct zram_pp_ctl *ctl = NULL; 2532 s32 prio = ZRAM_SECONDARY_COMP; 2533 u32 mode = 0, threshold = 0; 2534 struct zram_pp_slot *pps; 2535 struct page *page = NULL; 2536 bool prio_param = false; 2537 ssize_t ret; 2538 2539 args = skip_spaces(buf); 2540 while (*args) { 2541 args = next_arg(args, ¶m, &val); 2542 2543 if (!val || !*val) 2544 return -EINVAL; 2545 2546 if (!strcmp(param, "type")) { 2547 if (!strcmp(val, "idle")) 2548 mode = RECOMPRESS_IDLE; 2549 if (!strcmp(val, "huge")) 2550 mode = RECOMPRESS_HUGE; 2551 if (!strcmp(val, "huge_idle")) 2552 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2553 if (!mode) 2554 return -EINVAL; 2555 continue; 2556 } 2557 2558 if (!strcmp(param, "max_pages")) { 2559 /* 2560 * Limit the number of entries (pages) we attempt to 2561 * recompress. 2562 */ 2563 ret = kstrtoull(val, 10, &num_recomp_pages); 2564 if (ret) 2565 return ret; 2566 continue; 2567 } 2568 2569 if (!strcmp(param, "threshold")) { 2570 /* 2571 * We will re-compress only idle objects equal or 2572 * greater in size than watermark. 2573 */ 2574 ret = kstrtouint(val, 10, &threshold); 2575 if (ret) 2576 return ret; 2577 continue; 2578 } 2579 2580 if (!strcmp(param, "algo")) { 2581 algo = val; 2582 continue; 2583 } 2584 2585 if (!strcmp(param, "priority")) { 2586 prio_param = true; 2587 ret = kstrtoint(val, 10, &prio); 2588 if (ret) 2589 return ret; 2590 continue; 2591 } 2592 } 2593 2594 if (threshold >= huge_class_size) 2595 return -EINVAL; 2596 2597 guard(rwsem_write)(&zram->dev_lock); 2598 if (!init_done(zram)) 2599 return -EINVAL; 2600 2601 if (prio_param) { 2602 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 2603 return -EINVAL; 2604 } 2605 2606 if (algo && prio_param) { 2607 ret = validate_algo_priority(zram, algo, prio); 2608 if (ret) 2609 return ret; 2610 } 2611 2612 if (algo && !prio_param) { 2613 prio = lookup_algo_priority(zram, algo, ZRAM_SECONDARY_COMP); 2614 if (prio < 0) 2615 return -EINVAL; 2616 } 2617 2618 if (!zram->comps[prio]) 2619 return -EINVAL; 2620 2621 page = alloc_page(GFP_KERNEL); 2622 if (!page) { 2623 ret = -ENOMEM; 2624 goto out; 2625 } 2626 2627 ctl = init_pp_ctl(); 2628 if (!ctl) { 2629 ret = -ENOMEM; 2630 goto out; 2631 } 2632 2633 scan_slots_for_recompress(zram, mode, prio, ctl); 2634 2635 ret = len; 2636 while ((pps = select_pp_slot(ctl))) { 2637 int err = 0; 2638 2639 if (!num_recomp_pages) 2640 break; 2641 2642 slot_lock(zram, pps->index); 2643 if (!test_slot_flag(zram, pps->index, ZRAM_PP_SLOT)) 2644 goto next; 2645 2646 err = recompress_slot(zram, pps->index, page, 2647 &num_recomp_pages, threshold, prio); 2648 next: 2649 slot_unlock(zram, pps->index); 2650 release_pp_slot(zram, pps); 2651 2652 if (err) { 2653 ret = err; 2654 break; 2655 } 2656 2657 cond_resched(); 2658 } 2659 2660 out: 2661 if (page) 2662 __free_page(page); 2663 release_pp_ctl(zram, ctl); 2664 return ret; 2665 } 2666 #endif 2667 2668 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2669 { 2670 size_t n = bio->bi_iter.bi_size; 2671 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2672 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2673 SECTOR_SHIFT; 2674 2675 /* 2676 * zram manages data in physical block size units. Because logical block 2677 * size isn't identical with physical block size on some arch, we 2678 * could get a discard request pointing to a specific offset within a 2679 * certain physical block. Although we can handle this request by 2680 * reading that physiclal block and decompressing and partially zeroing 2681 * and re-compressing and then re-storing it, this isn't reasonable 2682 * because our intent with a discard request is to save memory. So 2683 * skipping this logical block is appropriate here. 2684 */ 2685 if (offset) { 2686 if (n <= (PAGE_SIZE - offset)) 2687 goto end_bio; 2688 2689 n -= (PAGE_SIZE - offset); 2690 index++; 2691 } 2692 2693 while (n >= PAGE_SIZE) { 2694 slot_lock(zram, index); 2695 slot_free(zram, index); 2696 slot_unlock(zram, index); 2697 atomic64_inc(&zram->stats.notify_free); 2698 index++; 2699 n -= PAGE_SIZE; 2700 } 2701 2702 end_bio: 2703 bio_endio(bio); 2704 } 2705 2706 static void zram_bio_read(struct zram *zram, struct bio *bio) 2707 { 2708 unsigned long start_time = bio_start_io_acct(bio); 2709 struct bvec_iter iter = bio->bi_iter; 2710 2711 do { 2712 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2713 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2714 SECTOR_SHIFT; 2715 struct bio_vec bv = bio_iter_iovec(bio, iter); 2716 2717 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2718 2719 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2720 atomic64_inc(&zram->stats.failed_reads); 2721 bio->bi_status = BLK_STS_IOERR; 2722 break; 2723 } 2724 flush_dcache_page(bv.bv_page); 2725 2726 slot_lock(zram, index); 2727 mark_slot_accessed(zram, index); 2728 slot_unlock(zram, index); 2729 2730 bio_advance_iter_single(bio, &iter, bv.bv_len); 2731 } while (iter.bi_size); 2732 2733 bio_end_io_acct(bio, start_time); 2734 bio_endio(bio); 2735 } 2736 2737 static void zram_bio_write(struct zram *zram, struct bio *bio) 2738 { 2739 unsigned long start_time = bio_start_io_acct(bio); 2740 struct bvec_iter iter = bio->bi_iter; 2741 2742 do { 2743 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2744 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2745 SECTOR_SHIFT; 2746 struct bio_vec bv = bio_iter_iovec(bio, iter); 2747 2748 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2749 2750 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2751 atomic64_inc(&zram->stats.failed_writes); 2752 bio->bi_status = BLK_STS_IOERR; 2753 break; 2754 } 2755 2756 slot_lock(zram, index); 2757 mark_slot_accessed(zram, index); 2758 slot_unlock(zram, index); 2759 2760 bio_advance_iter_single(bio, &iter, bv.bv_len); 2761 } while (iter.bi_size); 2762 2763 bio_end_io_acct(bio, start_time); 2764 bio_endio(bio); 2765 } 2766 2767 /* 2768 * Handler function for all zram I/O requests. 2769 */ 2770 static void zram_submit_bio(struct bio *bio) 2771 { 2772 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2773 2774 switch (bio_op(bio)) { 2775 case REQ_OP_READ: 2776 zram_bio_read(zram, bio); 2777 break; 2778 case REQ_OP_WRITE: 2779 zram_bio_write(zram, bio); 2780 break; 2781 case REQ_OP_DISCARD: 2782 case REQ_OP_WRITE_ZEROES: 2783 zram_bio_discard(zram, bio); 2784 break; 2785 default: 2786 WARN_ON_ONCE(1); 2787 bio_endio(bio); 2788 } 2789 } 2790 2791 static void zram_slot_free_notify(struct block_device *bdev, 2792 unsigned long index) 2793 { 2794 struct zram *zram; 2795 2796 zram = bdev->bd_disk->private_data; 2797 2798 atomic64_inc(&zram->stats.notify_free); 2799 if (!slot_trylock(zram, index)) { 2800 atomic64_inc(&zram->stats.miss_free); 2801 return; 2802 } 2803 2804 slot_free(zram, index); 2805 slot_unlock(zram, index); 2806 } 2807 2808 static void zram_comp_params_reset(struct zram *zram) 2809 { 2810 u32 prio; 2811 2812 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2813 comp_params_reset(zram, prio); 2814 } 2815 } 2816 2817 static void zram_destroy_comps(struct zram *zram) 2818 { 2819 u32 prio; 2820 2821 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2822 struct zcomp *comp = zram->comps[prio]; 2823 2824 zram->comps[prio] = NULL; 2825 if (!comp) 2826 continue; 2827 zcomp_destroy(comp); 2828 } 2829 2830 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) 2831 zram->comp_algs[prio] = NULL; 2832 2833 zram_comp_params_reset(zram); 2834 } 2835 2836 static void zram_reset_device(struct zram *zram) 2837 { 2838 guard(rwsem_write)(&zram->dev_lock); 2839 2840 zram->limit_pages = 0; 2841 2842 set_capacity_and_notify(zram->disk, 0); 2843 part_stat_set_all(zram->disk->part0, 0); 2844 2845 /* I/O operation under all of CPU are done so let's free */ 2846 zram_meta_free(zram, zram->disksize); 2847 zram->disksize = 0; 2848 zram_destroy_comps(zram); 2849 memset(&zram->stats, 0, sizeof(zram->stats)); 2850 reset_bdev(zram); 2851 2852 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2853 } 2854 2855 static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, 2856 const char *buf, size_t len) 2857 { 2858 u64 disksize; 2859 struct zcomp *comp; 2860 struct zram *zram = dev_to_zram(dev); 2861 int err; 2862 u32 prio; 2863 2864 disksize = memparse(buf, NULL); 2865 if (!disksize) 2866 return -EINVAL; 2867 2868 guard(rwsem_write)(&zram->dev_lock); 2869 if (init_done(zram)) { 2870 pr_info("Cannot change disksize for initialized device\n"); 2871 return -EBUSY; 2872 } 2873 2874 disksize = PAGE_ALIGN(disksize); 2875 if (!zram_meta_alloc(zram, disksize)) 2876 return -ENOMEM; 2877 2878 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2879 if (!zram->comp_algs[prio]) 2880 continue; 2881 2882 comp = zcomp_create(zram->comp_algs[prio], 2883 &zram->params[prio]); 2884 if (IS_ERR(comp)) { 2885 pr_err("Cannot initialise %s compressing backend\n", 2886 zram->comp_algs[prio]); 2887 err = PTR_ERR(comp); 2888 goto out_free_comps; 2889 } 2890 2891 zram->comps[prio] = comp; 2892 } 2893 zram->disksize = disksize; 2894 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2895 2896 return len; 2897 2898 out_free_comps: 2899 zram_destroy_comps(zram); 2900 zram_meta_free(zram, disksize); 2901 return err; 2902 } 2903 2904 static ssize_t reset_store(struct device *dev, 2905 struct device_attribute *attr, const char *buf, size_t len) 2906 { 2907 int ret; 2908 unsigned short do_reset; 2909 struct zram *zram; 2910 struct gendisk *disk; 2911 2912 ret = kstrtou16(buf, 10, &do_reset); 2913 if (ret) 2914 return ret; 2915 2916 if (!do_reset) 2917 return -EINVAL; 2918 2919 zram = dev_to_zram(dev); 2920 disk = zram->disk; 2921 2922 mutex_lock(&disk->open_mutex); 2923 /* Do not reset an active device or claimed device */ 2924 if (disk_openers(disk) || zram->claim) { 2925 mutex_unlock(&disk->open_mutex); 2926 return -EBUSY; 2927 } 2928 2929 /* From now on, anyone can't open /dev/zram[0-9] */ 2930 zram->claim = true; 2931 mutex_unlock(&disk->open_mutex); 2932 2933 /* Make sure all the pending I/O are finished */ 2934 sync_blockdev(disk->part0); 2935 zram_reset_device(zram); 2936 2937 mutex_lock(&disk->open_mutex); 2938 zram->claim = false; 2939 mutex_unlock(&disk->open_mutex); 2940 2941 return len; 2942 } 2943 2944 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2945 { 2946 struct zram *zram = disk->private_data; 2947 2948 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2949 2950 /* zram was claimed to reset so open request fails */ 2951 if (zram->claim) 2952 return -EBUSY; 2953 return 0; 2954 } 2955 2956 static const struct block_device_operations zram_devops = { 2957 .open = zram_open, 2958 .submit_bio = zram_submit_bio, 2959 .swap_slot_free_notify = zram_slot_free_notify, 2960 .owner = THIS_MODULE 2961 }; 2962 2963 static DEVICE_ATTR_RO(io_stat); 2964 static DEVICE_ATTR_RO(mm_stat); 2965 static DEVICE_ATTR_RO(debug_stat); 2966 static DEVICE_ATTR_WO(compact); 2967 static DEVICE_ATTR_RW(disksize); 2968 static DEVICE_ATTR_RO(initstate); 2969 static DEVICE_ATTR_WO(reset); 2970 static DEVICE_ATTR_WO(mem_limit); 2971 static DEVICE_ATTR_WO(mem_used_max); 2972 static DEVICE_ATTR_WO(idle); 2973 static DEVICE_ATTR_RW(comp_algorithm); 2974 #ifdef CONFIG_ZRAM_WRITEBACK 2975 static DEVICE_ATTR_RO(bd_stat); 2976 static DEVICE_ATTR_RW(backing_dev); 2977 static DEVICE_ATTR_WO(writeback); 2978 static DEVICE_ATTR_RW(writeback_limit); 2979 static DEVICE_ATTR_RW(writeback_limit_enable); 2980 static DEVICE_ATTR_RW(writeback_batch_size); 2981 static DEVICE_ATTR_RW(compressed_writeback); 2982 #endif 2983 #ifdef CONFIG_ZRAM_MULTI_COMP 2984 static DEVICE_ATTR_RW(recomp_algorithm); 2985 static DEVICE_ATTR_WO(recompress); 2986 #endif 2987 static DEVICE_ATTR_WO(algorithm_params); 2988 2989 static struct attribute *zram_disk_attrs[] = { 2990 &dev_attr_disksize.attr, 2991 &dev_attr_initstate.attr, 2992 &dev_attr_reset.attr, 2993 &dev_attr_compact.attr, 2994 &dev_attr_mem_limit.attr, 2995 &dev_attr_mem_used_max.attr, 2996 &dev_attr_idle.attr, 2997 &dev_attr_comp_algorithm.attr, 2998 #ifdef CONFIG_ZRAM_WRITEBACK 2999 &dev_attr_bd_stat.attr, 3000 &dev_attr_backing_dev.attr, 3001 &dev_attr_writeback.attr, 3002 &dev_attr_writeback_limit.attr, 3003 &dev_attr_writeback_limit_enable.attr, 3004 &dev_attr_writeback_batch_size.attr, 3005 &dev_attr_compressed_writeback.attr, 3006 #endif 3007 &dev_attr_io_stat.attr, 3008 &dev_attr_mm_stat.attr, 3009 &dev_attr_debug_stat.attr, 3010 #ifdef CONFIG_ZRAM_MULTI_COMP 3011 &dev_attr_recomp_algorithm.attr, 3012 &dev_attr_recompress.attr, 3013 #endif 3014 &dev_attr_algorithm_params.attr, 3015 NULL, 3016 }; 3017 3018 ATTRIBUTE_GROUPS(zram_disk); 3019 3020 /* 3021 * Allocate and initialize new zram device. the function returns 3022 * '>= 0' device_id upon success, and negative value otherwise. 3023 */ 3024 static int zram_add(void) 3025 { 3026 struct queue_limits lim = { 3027 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 3028 /* 3029 * To ensure that we always get PAGE_SIZE aligned and 3030 * n*PAGE_SIZED sized I/O requests. 3031 */ 3032 .physical_block_size = PAGE_SIZE, 3033 .io_min = PAGE_SIZE, 3034 .io_opt = PAGE_SIZE, 3035 .max_hw_discard_sectors = UINT_MAX, 3036 /* 3037 * zram_bio_discard() will clear all logical blocks if logical 3038 * block size is identical with physical block size(PAGE_SIZE). 3039 * But if it is different, we will skip discarding some parts of 3040 * logical blocks in the part of the request range which isn't 3041 * aligned to physical block size. So we can't ensure that all 3042 * discarded logical blocks are zeroed. 3043 */ 3044 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 3045 .max_write_zeroes_sectors = UINT_MAX, 3046 #endif 3047 .features = BLK_FEAT_STABLE_WRITES | 3048 BLK_FEAT_SYNCHRONOUS, 3049 }; 3050 struct zram *zram; 3051 int ret, device_id; 3052 3053 zram = kzalloc_obj(struct zram); 3054 if (!zram) 3055 return -ENOMEM; 3056 3057 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 3058 if (ret < 0) 3059 goto out_free_dev; 3060 device_id = ret; 3061 3062 init_rwsem(&zram->dev_lock); 3063 #ifdef CONFIG_ZRAM_WRITEBACK 3064 zram->wb_batch_size = 32; 3065 zram->compressed_wb = false; 3066 #endif 3067 3068 /* gendisk structure */ 3069 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 3070 if (IS_ERR(zram->disk)) { 3071 pr_err("Error allocating disk structure for device %d\n", 3072 device_id); 3073 ret = PTR_ERR(zram->disk); 3074 goto out_free_idr; 3075 } 3076 3077 zram->disk->major = zram_major; 3078 zram->disk->first_minor = device_id; 3079 zram->disk->minors = 1; 3080 zram->disk->flags |= GENHD_FL_NO_PART; 3081 zram->disk->fops = &zram_devops; 3082 zram->disk->private_data = zram; 3083 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 3084 zram_comp_params_reset(zram); 3085 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 3086 3087 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 3088 set_capacity(zram->disk, 0); 3089 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 3090 if (ret) 3091 goto out_cleanup_disk; 3092 3093 zram_debugfs_register(zram); 3094 pr_info("Added device: %s\n", zram->disk->disk_name); 3095 return device_id; 3096 3097 out_cleanup_disk: 3098 put_disk(zram->disk); 3099 out_free_idr: 3100 idr_remove(&zram_index_idr, device_id); 3101 out_free_dev: 3102 kfree(zram); 3103 return ret; 3104 } 3105 3106 static int zram_remove(struct zram *zram) 3107 { 3108 bool claimed; 3109 3110 mutex_lock(&zram->disk->open_mutex); 3111 if (disk_openers(zram->disk)) { 3112 mutex_unlock(&zram->disk->open_mutex); 3113 return -EBUSY; 3114 } 3115 3116 claimed = zram->claim; 3117 if (!claimed) 3118 zram->claim = true; 3119 mutex_unlock(&zram->disk->open_mutex); 3120 3121 zram_debugfs_unregister(zram); 3122 3123 if (claimed) { 3124 /* 3125 * If we were claimed by reset_store(), del_gendisk() will 3126 * wait until reset_store() is done, so nothing need to do. 3127 */ 3128 ; 3129 } else { 3130 /* Make sure all the pending I/O are finished */ 3131 sync_blockdev(zram->disk->part0); 3132 zram_reset_device(zram); 3133 } 3134 3135 pr_info("Removed device: %s\n", zram->disk->disk_name); 3136 3137 del_gendisk(zram->disk); 3138 3139 /* del_gendisk drains pending reset_store */ 3140 WARN_ON_ONCE(claimed && zram->claim); 3141 3142 /* 3143 * disksize_store() may be called in between zram_reset_device() 3144 * and del_gendisk(), so run the last reset to avoid leaking 3145 * anything allocated with disksize_store() 3146 */ 3147 zram_reset_device(zram); 3148 3149 put_disk(zram->disk); 3150 kfree(zram); 3151 return 0; 3152 } 3153 3154 /* zram-control sysfs attributes */ 3155 3156 /* 3157 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 3158 * sense that reading from this file does alter the state of your system -- it 3159 * creates a new un-initialized zram device and returns back this device's 3160 * device_id (or an error code if it fails to create a new device). 3161 */ 3162 static ssize_t hot_add_show(const struct class *class, 3163 const struct class_attribute *attr, 3164 char *buf) 3165 { 3166 int ret; 3167 3168 mutex_lock(&zram_index_mutex); 3169 ret = zram_add(); 3170 mutex_unlock(&zram_index_mutex); 3171 3172 if (ret < 0) 3173 return ret; 3174 return sysfs_emit(buf, "%d\n", ret); 3175 } 3176 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 3177 static struct class_attribute class_attr_hot_add = 3178 __ATTR(hot_add, 0400, hot_add_show, NULL); 3179 3180 static ssize_t hot_remove_store(const struct class *class, 3181 const struct class_attribute *attr, 3182 const char *buf, 3183 size_t count) 3184 { 3185 struct zram *zram; 3186 int ret, dev_id; 3187 3188 /* dev_id is gendisk->first_minor, which is `int' */ 3189 ret = kstrtoint(buf, 10, &dev_id); 3190 if (ret) 3191 return ret; 3192 if (dev_id < 0) 3193 return -EINVAL; 3194 3195 mutex_lock(&zram_index_mutex); 3196 3197 zram = idr_find(&zram_index_idr, dev_id); 3198 if (zram) { 3199 ret = zram_remove(zram); 3200 if (!ret) 3201 idr_remove(&zram_index_idr, dev_id); 3202 } else { 3203 ret = -ENODEV; 3204 } 3205 3206 mutex_unlock(&zram_index_mutex); 3207 return ret ? ret : count; 3208 } 3209 static CLASS_ATTR_WO(hot_remove); 3210 3211 static struct attribute *zram_control_class_attrs[] = { 3212 &class_attr_hot_add.attr, 3213 &class_attr_hot_remove.attr, 3214 NULL, 3215 }; 3216 ATTRIBUTE_GROUPS(zram_control_class); 3217 3218 static struct class zram_control_class = { 3219 .name = "zram-control", 3220 .class_groups = zram_control_class_groups, 3221 }; 3222 3223 static int zram_remove_cb(int id, void *ptr, void *data) 3224 { 3225 WARN_ON_ONCE(zram_remove(ptr)); 3226 return 0; 3227 } 3228 3229 static void destroy_devices(void) 3230 { 3231 class_unregister(&zram_control_class); 3232 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 3233 zram_debugfs_destroy(); 3234 idr_destroy(&zram_index_idr); 3235 unregister_blkdev(zram_major, "zram"); 3236 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3237 } 3238 3239 static int __init zram_init(void) 3240 { 3241 struct zram_table_entry zram_te; 3242 int ret; 3243 3244 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.attr.flags) * 8); 3245 3246 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 3247 zcomp_cpu_up_prepare, zcomp_cpu_dead); 3248 if (ret < 0) 3249 return ret; 3250 3251 ret = class_register(&zram_control_class); 3252 if (ret) { 3253 pr_err("Unable to register zram-control class\n"); 3254 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3255 return ret; 3256 } 3257 3258 zram_debugfs_create(); 3259 zram_major = register_blkdev(0, "zram"); 3260 if (zram_major <= 0) { 3261 pr_err("Unable to get major number\n"); 3262 class_unregister(&zram_control_class); 3263 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 3264 return -EBUSY; 3265 } 3266 3267 while (num_devices != 0) { 3268 mutex_lock(&zram_index_mutex); 3269 ret = zram_add(); 3270 mutex_unlock(&zram_index_mutex); 3271 if (ret < 0) 3272 goto out_error; 3273 num_devices--; 3274 } 3275 3276 return 0; 3277 3278 out_error: 3279 destroy_devices(); 3280 return ret; 3281 } 3282 3283 static void __exit zram_exit(void) 3284 { 3285 destroy_devices(); 3286 } 3287 3288 module_init(zram_init); 3289 module_exit(zram_exit); 3290 3291 module_param(num_devices, uint, 0); 3292 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 3293 3294 MODULE_LICENSE("Dual BSD/GPL"); 3295 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 3296 MODULE_DESCRIPTION("Compressed RAM Block Device"); 3297