1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 #include <linux/kernel_read_file.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 /* Module params (documentation at end) */ 48 static unsigned int num_devices = 1; 49 /* 50 * Pages that compress to sizes equals or greater than this are stored 51 * uncompressed in memory. 52 */ 53 static size_t huge_class_size; 54 55 static const struct block_device_operations zram_devops; 56 57 static void zram_free_page(struct zram *zram, size_t index); 58 static int zram_read_from_zspool(struct zram *zram, struct page *page, 59 u32 index); 60 61 static int zram_slot_trylock(struct zram *zram, u32 index) 62 { 63 return spin_trylock(&zram->table[index].lock); 64 } 65 66 static void zram_slot_lock(struct zram *zram, u32 index) 67 { 68 spin_lock(&zram->table[index].lock); 69 } 70 71 static void zram_slot_unlock(struct zram *zram, u32 index) 72 { 73 spin_unlock(&zram->table[index].lock); 74 } 75 76 static inline bool init_done(struct zram *zram) 77 { 78 return zram->disksize; 79 } 80 81 static inline struct zram *dev_to_zram(struct device *dev) 82 { 83 return (struct zram *)dev_to_disk(dev)->private_data; 84 } 85 86 static unsigned long zram_get_handle(struct zram *zram, u32 index) 87 { 88 return zram->table[index].handle; 89 } 90 91 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 92 { 93 zram->table[index].handle = handle; 94 } 95 96 /* flag operations require table entry bit_spin_lock() being held */ 97 static bool zram_test_flag(struct zram *zram, u32 index, 98 enum zram_pageflags flag) 99 { 100 return zram->table[index].flags & BIT(flag); 101 } 102 103 static void zram_set_flag(struct zram *zram, u32 index, 104 enum zram_pageflags flag) 105 { 106 zram->table[index].flags |= BIT(flag); 107 } 108 109 static void zram_clear_flag(struct zram *zram, u32 index, 110 enum zram_pageflags flag) 111 { 112 zram->table[index].flags &= ~BIT(flag); 113 } 114 115 static size_t zram_get_obj_size(struct zram *zram, u32 index) 116 { 117 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 118 } 119 120 static void zram_set_obj_size(struct zram *zram, 121 u32 index, size_t size) 122 { 123 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 124 125 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 126 } 127 128 static inline bool zram_allocated(struct zram *zram, u32 index) 129 { 130 return zram_get_obj_size(zram, index) || 131 zram_test_flag(zram, index, ZRAM_SAME) || 132 zram_test_flag(zram, index, ZRAM_WB); 133 } 134 135 static inline void update_used_max(struct zram *zram, const unsigned long pages) 136 { 137 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 138 139 do { 140 if (cur_max >= pages) 141 return; 142 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 143 &cur_max, pages)); 144 } 145 146 static bool zram_can_store_page(struct zram *zram) 147 { 148 unsigned long alloced_pages; 149 150 alloced_pages = zs_get_total_pages(zram->mem_pool); 151 update_used_max(zram, alloced_pages); 152 153 return !zram->limit_pages || alloced_pages <= zram->limit_pages; 154 } 155 156 #if PAGE_SIZE != 4096 157 static inline bool is_partial_io(struct bio_vec *bvec) 158 { 159 return bvec->bv_len != PAGE_SIZE; 160 } 161 #define ZRAM_PARTIAL_IO 1 162 #else 163 static inline bool is_partial_io(struct bio_vec *bvec) 164 { 165 return false; 166 } 167 #endif 168 169 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) 170 { 171 prio &= ZRAM_COMP_PRIORITY_MASK; 172 /* 173 * Clear previous priority value first, in case if we recompress 174 * further an already recompressed page 175 */ 176 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << 177 ZRAM_COMP_PRIORITY_BIT1); 178 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 179 } 180 181 static inline u32 zram_get_priority(struct zram *zram, u32 index) 182 { 183 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; 184 185 return prio & ZRAM_COMP_PRIORITY_MASK; 186 } 187 188 static void zram_accessed(struct zram *zram, u32 index) 189 { 190 zram_clear_flag(zram, index, ZRAM_IDLE); 191 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 192 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 193 zram->table[index].ac_time = ktime_get_boottime(); 194 #endif 195 } 196 197 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 198 struct zram_pp_slot { 199 unsigned long index; 200 struct list_head entry; 201 }; 202 203 /* 204 * A post-processing bucket is, essentially, a size class, this defines 205 * the range (in bytes) of pp-slots sizes in particular bucket. 206 */ 207 #define PP_BUCKET_SIZE_RANGE 64 208 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 209 210 struct zram_pp_ctl { 211 struct list_head pp_buckets[NUM_PP_BUCKETS]; 212 }; 213 214 static struct zram_pp_ctl *init_pp_ctl(void) 215 { 216 struct zram_pp_ctl *ctl; 217 u32 idx; 218 219 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); 220 if (!ctl) 221 return NULL; 222 223 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 224 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 225 return ctl; 226 } 227 228 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 229 { 230 list_del_init(&pps->entry); 231 232 zram_slot_lock(zram, pps->index); 233 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); 234 zram_slot_unlock(zram, pps->index); 235 236 kfree(pps); 237 } 238 239 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 240 { 241 u32 idx; 242 243 if (!ctl) 244 return; 245 246 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 247 while (!list_empty(&ctl->pp_buckets[idx])) { 248 struct zram_pp_slot *pps; 249 250 pps = list_first_entry(&ctl->pp_buckets[idx], 251 struct zram_pp_slot, 252 entry); 253 release_pp_slot(zram, pps); 254 } 255 } 256 257 kfree(ctl); 258 } 259 260 static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 261 struct zram_pp_slot *pps) 262 { 263 u32 idx; 264 265 idx = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 266 list_add(&pps->entry, &ctl->pp_buckets[idx]); 267 268 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); 269 } 270 271 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 272 { 273 struct zram_pp_slot *pps = NULL; 274 s32 idx = NUM_PP_BUCKETS - 1; 275 276 /* The higher the bucket id the more optimal slot post-processing is */ 277 while (idx >= 0) { 278 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 279 struct zram_pp_slot, 280 entry); 281 if (pps) 282 break; 283 284 idx--; 285 } 286 return pps; 287 } 288 #endif 289 290 static inline void zram_fill_page(void *ptr, unsigned long len, 291 unsigned long value) 292 { 293 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 294 memset_l(ptr, value, len / sizeof(unsigned long)); 295 } 296 297 static bool page_same_filled(void *ptr, unsigned long *element) 298 { 299 unsigned long *page; 300 unsigned long val; 301 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 302 303 page = (unsigned long *)ptr; 304 val = page[0]; 305 306 if (val != page[last_pos]) 307 return false; 308 309 for (pos = 1; pos < last_pos; pos++) { 310 if (val != page[pos]) 311 return false; 312 } 313 314 *element = val; 315 316 return true; 317 } 318 319 static ssize_t initstate_show(struct device *dev, 320 struct device_attribute *attr, char *buf) 321 { 322 u32 val; 323 struct zram *zram = dev_to_zram(dev); 324 325 down_read(&zram->init_lock); 326 val = init_done(zram); 327 up_read(&zram->init_lock); 328 329 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 330 } 331 332 static ssize_t disksize_show(struct device *dev, 333 struct device_attribute *attr, char *buf) 334 { 335 struct zram *zram = dev_to_zram(dev); 336 337 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 338 } 339 340 static ssize_t mem_limit_store(struct device *dev, 341 struct device_attribute *attr, const char *buf, size_t len) 342 { 343 u64 limit; 344 char *tmp; 345 struct zram *zram = dev_to_zram(dev); 346 347 limit = memparse(buf, &tmp); 348 if (buf == tmp) /* no chars parsed, invalid input */ 349 return -EINVAL; 350 351 down_write(&zram->init_lock); 352 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 353 up_write(&zram->init_lock); 354 355 return len; 356 } 357 358 static ssize_t mem_used_max_store(struct device *dev, 359 struct device_attribute *attr, const char *buf, size_t len) 360 { 361 int err; 362 unsigned long val; 363 struct zram *zram = dev_to_zram(dev); 364 365 err = kstrtoul(buf, 10, &val); 366 if (err || val != 0) 367 return -EINVAL; 368 369 down_read(&zram->init_lock); 370 if (init_done(zram)) { 371 atomic_long_set(&zram->stats.max_used_pages, 372 zs_get_total_pages(zram->mem_pool)); 373 } 374 up_read(&zram->init_lock); 375 376 return len; 377 } 378 379 /* 380 * Mark all pages which are older than or equal to cutoff as IDLE. 381 * Callers should hold the zram init lock in read mode 382 */ 383 static void mark_idle(struct zram *zram, ktime_t cutoff) 384 { 385 int is_idle = 1; 386 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 387 int index; 388 389 for (index = 0; index < nr_pages; index++) { 390 /* 391 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 392 * post-processing (recompress, writeback) happens to the 393 * ZRAM_SAME slot. 394 * 395 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 396 */ 397 zram_slot_lock(zram, index); 398 if (!zram_allocated(zram, index) || 399 zram_test_flag(zram, index, ZRAM_WB) || 400 zram_test_flag(zram, index, ZRAM_SAME)) { 401 zram_slot_unlock(zram, index); 402 continue; 403 } 404 405 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 406 is_idle = !cutoff || 407 ktime_after(cutoff, zram->table[index].ac_time); 408 #endif 409 if (is_idle) 410 zram_set_flag(zram, index, ZRAM_IDLE); 411 else 412 zram_clear_flag(zram, index, ZRAM_IDLE); 413 zram_slot_unlock(zram, index); 414 } 415 } 416 417 static ssize_t idle_store(struct device *dev, 418 struct device_attribute *attr, const char *buf, size_t len) 419 { 420 struct zram *zram = dev_to_zram(dev); 421 ktime_t cutoff_time = 0; 422 ssize_t rv = -EINVAL; 423 424 if (!sysfs_streq(buf, "all")) { 425 /* 426 * If it did not parse as 'all' try to treat it as an integer 427 * when we have memory tracking enabled. 428 */ 429 u64 age_sec; 430 431 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) 432 cutoff_time = ktime_sub(ktime_get_boottime(), 433 ns_to_ktime(age_sec * NSEC_PER_SEC)); 434 else 435 goto out; 436 } 437 438 down_read(&zram->init_lock); 439 if (!init_done(zram)) 440 goto out_unlock; 441 442 /* 443 * A cutoff_time of 0 marks everything as idle, this is the 444 * "all" behavior. 445 */ 446 mark_idle(zram, cutoff_time); 447 rv = len; 448 449 out_unlock: 450 up_read(&zram->init_lock); 451 out: 452 return rv; 453 } 454 455 #ifdef CONFIG_ZRAM_WRITEBACK 456 static ssize_t writeback_limit_enable_store(struct device *dev, 457 struct device_attribute *attr, const char *buf, size_t len) 458 { 459 struct zram *zram = dev_to_zram(dev); 460 u64 val; 461 ssize_t ret = -EINVAL; 462 463 if (kstrtoull(buf, 10, &val)) 464 return ret; 465 466 down_read(&zram->init_lock); 467 spin_lock(&zram->wb_limit_lock); 468 zram->wb_limit_enable = val; 469 spin_unlock(&zram->wb_limit_lock); 470 up_read(&zram->init_lock); 471 ret = len; 472 473 return ret; 474 } 475 476 static ssize_t writeback_limit_enable_show(struct device *dev, 477 struct device_attribute *attr, char *buf) 478 { 479 bool val; 480 struct zram *zram = dev_to_zram(dev); 481 482 down_read(&zram->init_lock); 483 spin_lock(&zram->wb_limit_lock); 484 val = zram->wb_limit_enable; 485 spin_unlock(&zram->wb_limit_lock); 486 up_read(&zram->init_lock); 487 488 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 489 } 490 491 static ssize_t writeback_limit_store(struct device *dev, 492 struct device_attribute *attr, const char *buf, size_t len) 493 { 494 struct zram *zram = dev_to_zram(dev); 495 u64 val; 496 ssize_t ret = -EINVAL; 497 498 if (kstrtoull(buf, 10, &val)) 499 return ret; 500 501 down_read(&zram->init_lock); 502 spin_lock(&zram->wb_limit_lock); 503 zram->bd_wb_limit = val; 504 spin_unlock(&zram->wb_limit_lock); 505 up_read(&zram->init_lock); 506 ret = len; 507 508 return ret; 509 } 510 511 static ssize_t writeback_limit_show(struct device *dev, 512 struct device_attribute *attr, char *buf) 513 { 514 u64 val; 515 struct zram *zram = dev_to_zram(dev); 516 517 down_read(&zram->init_lock); 518 spin_lock(&zram->wb_limit_lock); 519 val = zram->bd_wb_limit; 520 spin_unlock(&zram->wb_limit_lock); 521 up_read(&zram->init_lock); 522 523 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 524 } 525 526 static void reset_bdev(struct zram *zram) 527 { 528 if (!zram->backing_dev) 529 return; 530 531 /* hope filp_close flush all of IO */ 532 filp_close(zram->backing_dev, NULL); 533 zram->backing_dev = NULL; 534 zram->bdev = NULL; 535 zram->disk->fops = &zram_devops; 536 kvfree(zram->bitmap); 537 zram->bitmap = NULL; 538 } 539 540 static ssize_t backing_dev_show(struct device *dev, 541 struct device_attribute *attr, char *buf) 542 { 543 struct file *file; 544 struct zram *zram = dev_to_zram(dev); 545 char *p; 546 ssize_t ret; 547 548 down_read(&zram->init_lock); 549 file = zram->backing_dev; 550 if (!file) { 551 memcpy(buf, "none\n", 5); 552 up_read(&zram->init_lock); 553 return 5; 554 } 555 556 p = file_path(file, buf, PAGE_SIZE - 1); 557 if (IS_ERR(p)) { 558 ret = PTR_ERR(p); 559 goto out; 560 } 561 562 ret = strlen(p); 563 memmove(buf, p, ret); 564 buf[ret++] = '\n'; 565 out: 566 up_read(&zram->init_lock); 567 return ret; 568 } 569 570 static ssize_t backing_dev_store(struct device *dev, 571 struct device_attribute *attr, const char *buf, size_t len) 572 { 573 char *file_name; 574 size_t sz; 575 struct file *backing_dev = NULL; 576 struct inode *inode; 577 unsigned int bitmap_sz; 578 unsigned long nr_pages, *bitmap = NULL; 579 int err; 580 struct zram *zram = dev_to_zram(dev); 581 582 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 583 if (!file_name) 584 return -ENOMEM; 585 586 down_write(&zram->init_lock); 587 if (init_done(zram)) { 588 pr_info("Can't setup backing device for initialized device\n"); 589 err = -EBUSY; 590 goto out; 591 } 592 593 strscpy(file_name, buf, PATH_MAX); 594 /* ignore trailing newline */ 595 sz = strlen(file_name); 596 if (sz > 0 && file_name[sz - 1] == '\n') 597 file_name[sz - 1] = 0x00; 598 599 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 600 if (IS_ERR(backing_dev)) { 601 err = PTR_ERR(backing_dev); 602 backing_dev = NULL; 603 goto out; 604 } 605 606 inode = backing_dev->f_mapping->host; 607 608 /* Support only block device in this moment */ 609 if (!S_ISBLK(inode->i_mode)) { 610 err = -ENOTBLK; 611 goto out; 612 } 613 614 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 615 /* Refuse to use zero sized device (also prevents self reference) */ 616 if (!nr_pages) { 617 err = -EINVAL; 618 goto out; 619 } 620 621 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 622 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 623 if (!bitmap) { 624 err = -ENOMEM; 625 goto out; 626 } 627 628 reset_bdev(zram); 629 630 zram->bdev = I_BDEV(inode); 631 zram->backing_dev = backing_dev; 632 zram->bitmap = bitmap; 633 zram->nr_pages = nr_pages; 634 up_write(&zram->init_lock); 635 636 pr_info("setup backing device %s\n", file_name); 637 kfree(file_name); 638 639 return len; 640 out: 641 kvfree(bitmap); 642 643 if (backing_dev) 644 filp_close(backing_dev, NULL); 645 646 up_write(&zram->init_lock); 647 648 kfree(file_name); 649 650 return err; 651 } 652 653 static unsigned long alloc_block_bdev(struct zram *zram) 654 { 655 unsigned long blk_idx = 1; 656 retry: 657 /* skip 0 bit to confuse zram.handle = 0 */ 658 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 659 if (blk_idx == zram->nr_pages) 660 return 0; 661 662 if (test_and_set_bit(blk_idx, zram->bitmap)) 663 goto retry; 664 665 atomic64_inc(&zram->stats.bd_count); 666 return blk_idx; 667 } 668 669 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 670 { 671 int was_set; 672 673 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 674 WARN_ON_ONCE(!was_set); 675 atomic64_dec(&zram->stats.bd_count); 676 } 677 678 static void read_from_bdev_async(struct zram *zram, struct page *page, 679 unsigned long entry, struct bio *parent) 680 { 681 struct bio *bio; 682 683 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 684 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 685 __bio_add_page(bio, page, PAGE_SIZE, 0); 686 bio_chain(bio, parent); 687 submit_bio(bio); 688 } 689 690 #define PAGE_WB_SIG "page_index=" 691 692 #define PAGE_WRITEBACK 0 693 #define HUGE_WRITEBACK (1<<0) 694 #define IDLE_WRITEBACK (1<<1) 695 #define INCOMPRESSIBLE_WRITEBACK (1<<2) 696 697 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 698 unsigned long nr_pages, 699 unsigned long index, 700 struct zram_pp_ctl *ctl) 701 { 702 struct zram_pp_slot *pps = NULL; 703 704 for (; nr_pages != 0; index++, nr_pages--) { 705 if (!pps) 706 pps = kmalloc(sizeof(*pps), GFP_KERNEL); 707 if (!pps) 708 return -ENOMEM; 709 710 INIT_LIST_HEAD(&pps->entry); 711 712 zram_slot_lock(zram, index); 713 if (!zram_allocated(zram, index)) 714 goto next; 715 716 if (zram_test_flag(zram, index, ZRAM_WB) || 717 zram_test_flag(zram, index, ZRAM_SAME)) 718 goto next; 719 720 if (mode & IDLE_WRITEBACK && 721 !zram_test_flag(zram, index, ZRAM_IDLE)) 722 goto next; 723 if (mode & HUGE_WRITEBACK && 724 !zram_test_flag(zram, index, ZRAM_HUGE)) 725 goto next; 726 if (mode & INCOMPRESSIBLE_WRITEBACK && 727 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 728 goto next; 729 730 pps->index = index; 731 place_pp_slot(zram, ctl, pps); 732 pps = NULL; 733 next: 734 zram_slot_unlock(zram, index); 735 } 736 737 kfree(pps); 738 return 0; 739 } 740 741 static ssize_t writeback_store(struct device *dev, 742 struct device_attribute *attr, const char *buf, size_t len) 743 { 744 struct zram *zram = dev_to_zram(dev); 745 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 746 struct zram_pp_ctl *ctl = NULL; 747 struct zram_pp_slot *pps; 748 unsigned long index = 0; 749 struct bio bio; 750 struct bio_vec bio_vec; 751 struct page *page; 752 ssize_t ret = len; 753 int mode, err; 754 unsigned long blk_idx = 0; 755 756 if (sysfs_streq(buf, "idle")) 757 mode = IDLE_WRITEBACK; 758 else if (sysfs_streq(buf, "huge")) 759 mode = HUGE_WRITEBACK; 760 else if (sysfs_streq(buf, "huge_idle")) 761 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 762 else if (sysfs_streq(buf, "incompressible")) 763 mode = INCOMPRESSIBLE_WRITEBACK; 764 else { 765 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 766 return -EINVAL; 767 768 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 769 index >= nr_pages) 770 return -EINVAL; 771 772 nr_pages = 1; 773 mode = PAGE_WRITEBACK; 774 } 775 776 down_read(&zram->init_lock); 777 if (!init_done(zram)) { 778 ret = -EINVAL; 779 goto release_init_lock; 780 } 781 782 /* Do not permit concurrent post-processing actions. */ 783 if (atomic_xchg(&zram->pp_in_progress, 1)) { 784 up_read(&zram->init_lock); 785 return -EAGAIN; 786 } 787 788 if (!zram->backing_dev) { 789 ret = -ENODEV; 790 goto release_init_lock; 791 } 792 793 page = alloc_page(GFP_KERNEL); 794 if (!page) { 795 ret = -ENOMEM; 796 goto release_init_lock; 797 } 798 799 ctl = init_pp_ctl(); 800 if (!ctl) { 801 ret = -ENOMEM; 802 goto release_init_lock; 803 } 804 805 scan_slots_for_writeback(zram, mode, nr_pages, index, ctl); 806 807 while ((pps = select_pp_slot(ctl))) { 808 spin_lock(&zram->wb_limit_lock); 809 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 810 spin_unlock(&zram->wb_limit_lock); 811 ret = -EIO; 812 break; 813 } 814 spin_unlock(&zram->wb_limit_lock); 815 816 if (!blk_idx) { 817 blk_idx = alloc_block_bdev(zram); 818 if (!blk_idx) { 819 ret = -ENOSPC; 820 break; 821 } 822 } 823 824 index = pps->index; 825 zram_slot_lock(zram, index); 826 /* 827 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so 828 * slots can change in the meantime. If slots are accessed or 829 * freed they lose ZRAM_PP_SLOT flag and hence we don't 830 * post-process them. 831 */ 832 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 833 goto next; 834 if (zram_read_from_zspool(zram, page, index)) 835 goto next; 836 zram_slot_unlock(zram, index); 837 838 bio_init(&bio, zram->bdev, &bio_vec, 1, 839 REQ_OP_WRITE | REQ_SYNC); 840 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 841 __bio_add_page(&bio, page, PAGE_SIZE, 0); 842 843 /* 844 * XXX: A single page IO would be inefficient for write 845 * but it would be not bad as starter. 846 */ 847 err = submit_bio_wait(&bio); 848 if (err) { 849 release_pp_slot(zram, pps); 850 /* 851 * BIO errors are not fatal, we continue and simply 852 * attempt to writeback the remaining objects (pages). 853 * At the same time we need to signal user-space that 854 * some writes (at least one, but also could be all of 855 * them) were not successful and we do so by returning 856 * the most recent BIO error. 857 */ 858 ret = err; 859 continue; 860 } 861 862 atomic64_inc(&zram->stats.bd_writes); 863 zram_slot_lock(zram, index); 864 /* 865 * Same as above, we release slot lock during writeback so 866 * slot can change under us: slot_free() or slot_free() and 867 * reallocation (zram_write_page()). In both cases slot loses 868 * ZRAM_PP_SLOT flag. No concurrent post-processing can set 869 * ZRAM_PP_SLOT on such slots until current post-processing 870 * finishes. 871 */ 872 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 873 goto next; 874 875 zram_free_page(zram, index); 876 zram_set_flag(zram, index, ZRAM_WB); 877 zram_set_handle(zram, index, blk_idx); 878 blk_idx = 0; 879 atomic64_inc(&zram->stats.pages_stored); 880 spin_lock(&zram->wb_limit_lock); 881 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 882 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 883 spin_unlock(&zram->wb_limit_lock); 884 next: 885 zram_slot_unlock(zram, index); 886 release_pp_slot(zram, pps); 887 888 cond_resched(); 889 } 890 891 if (blk_idx) 892 free_block_bdev(zram, blk_idx); 893 __free_page(page); 894 release_init_lock: 895 release_pp_ctl(zram, ctl); 896 atomic_set(&zram->pp_in_progress, 0); 897 up_read(&zram->init_lock); 898 899 return ret; 900 } 901 902 struct zram_work { 903 struct work_struct work; 904 struct zram *zram; 905 unsigned long entry; 906 struct page *page; 907 int error; 908 }; 909 910 static void zram_sync_read(struct work_struct *work) 911 { 912 struct zram_work *zw = container_of(work, struct zram_work, work); 913 struct bio_vec bv; 914 struct bio bio; 915 916 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 917 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 918 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 919 zw->error = submit_bio_wait(&bio); 920 } 921 922 /* 923 * Block layer want one ->submit_bio to be active at a time, so if we use 924 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 925 * use a worker thread context. 926 */ 927 static int read_from_bdev_sync(struct zram *zram, struct page *page, 928 unsigned long entry) 929 { 930 struct zram_work work; 931 932 work.page = page; 933 work.zram = zram; 934 work.entry = entry; 935 936 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 937 queue_work(system_unbound_wq, &work.work); 938 flush_work(&work.work); 939 destroy_work_on_stack(&work.work); 940 941 return work.error; 942 } 943 944 static int read_from_bdev(struct zram *zram, struct page *page, 945 unsigned long entry, struct bio *parent) 946 { 947 atomic64_inc(&zram->stats.bd_reads); 948 if (!parent) { 949 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 950 return -EIO; 951 return read_from_bdev_sync(zram, page, entry); 952 } 953 read_from_bdev_async(zram, page, entry, parent); 954 return 0; 955 } 956 #else 957 static inline void reset_bdev(struct zram *zram) {}; 958 static int read_from_bdev(struct zram *zram, struct page *page, 959 unsigned long entry, struct bio *parent) 960 { 961 return -EIO; 962 } 963 964 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 965 #endif 966 967 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 968 969 static struct dentry *zram_debugfs_root; 970 971 static void zram_debugfs_create(void) 972 { 973 zram_debugfs_root = debugfs_create_dir("zram", NULL); 974 } 975 976 static void zram_debugfs_destroy(void) 977 { 978 debugfs_remove_recursive(zram_debugfs_root); 979 } 980 981 static ssize_t read_block_state(struct file *file, char __user *buf, 982 size_t count, loff_t *ppos) 983 { 984 char *kbuf; 985 ssize_t index, written = 0; 986 struct zram *zram = file->private_data; 987 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 988 struct timespec64 ts; 989 990 kbuf = kvmalloc(count, GFP_KERNEL); 991 if (!kbuf) 992 return -ENOMEM; 993 994 down_read(&zram->init_lock); 995 if (!init_done(zram)) { 996 up_read(&zram->init_lock); 997 kvfree(kbuf); 998 return -EINVAL; 999 } 1000 1001 for (index = *ppos; index < nr_pages; index++) { 1002 int copied; 1003 1004 zram_slot_lock(zram, index); 1005 if (!zram_allocated(zram, index)) 1006 goto next; 1007 1008 ts = ktime_to_timespec64(zram->table[index].ac_time); 1009 copied = snprintf(kbuf + written, count, 1010 "%12zd %12lld.%06lu %c%c%c%c%c%c\n", 1011 index, (s64)ts.tv_sec, 1012 ts.tv_nsec / NSEC_PER_USEC, 1013 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1014 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1015 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1016 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1017 zram_get_priority(zram, index) ? 'r' : '.', 1018 zram_test_flag(zram, index, 1019 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1020 1021 if (count <= copied) { 1022 zram_slot_unlock(zram, index); 1023 break; 1024 } 1025 written += copied; 1026 count -= copied; 1027 next: 1028 zram_slot_unlock(zram, index); 1029 *ppos += 1; 1030 } 1031 1032 up_read(&zram->init_lock); 1033 if (copy_to_user(buf, kbuf, written)) 1034 written = -EFAULT; 1035 kvfree(kbuf); 1036 1037 return written; 1038 } 1039 1040 static const struct file_operations proc_zram_block_state_op = { 1041 .open = simple_open, 1042 .read = read_block_state, 1043 .llseek = default_llseek, 1044 }; 1045 1046 static void zram_debugfs_register(struct zram *zram) 1047 { 1048 if (!zram_debugfs_root) 1049 return; 1050 1051 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1052 zram_debugfs_root); 1053 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1054 zram, &proc_zram_block_state_op); 1055 } 1056 1057 static void zram_debugfs_unregister(struct zram *zram) 1058 { 1059 debugfs_remove_recursive(zram->debugfs_dir); 1060 } 1061 #else 1062 static void zram_debugfs_create(void) {}; 1063 static void zram_debugfs_destroy(void) {}; 1064 static void zram_debugfs_register(struct zram *zram) {}; 1065 static void zram_debugfs_unregister(struct zram *zram) {}; 1066 #endif 1067 1068 /* 1069 * We switched to per-cpu streams and this attr is not needed anymore. 1070 * However, we will keep it around for some time, because: 1071 * a) we may revert per-cpu streams in the future 1072 * b) it's visible to user space and we need to follow our 2 years 1073 * retirement rule; but we already have a number of 'soon to be 1074 * altered' attrs, so max_comp_streams need to wait for the next 1075 * layoff cycle. 1076 */ 1077 static ssize_t max_comp_streams_show(struct device *dev, 1078 struct device_attribute *attr, char *buf) 1079 { 1080 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 1081 } 1082 1083 static ssize_t max_comp_streams_store(struct device *dev, 1084 struct device_attribute *attr, const char *buf, size_t len) 1085 { 1086 return len; 1087 } 1088 1089 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1090 { 1091 /* Do not free statically defined compression algorithms */ 1092 if (zram->comp_algs[prio] != default_compressor) 1093 kfree(zram->comp_algs[prio]); 1094 1095 zram->comp_algs[prio] = alg; 1096 } 1097 1098 static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) 1099 { 1100 ssize_t sz; 1101 1102 down_read(&zram->init_lock); 1103 sz = zcomp_available_show(zram->comp_algs[prio], buf); 1104 up_read(&zram->init_lock); 1105 1106 return sz; 1107 } 1108 1109 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1110 { 1111 char *compressor; 1112 size_t sz; 1113 1114 sz = strlen(buf); 1115 if (sz >= CRYPTO_MAX_ALG_NAME) 1116 return -E2BIG; 1117 1118 compressor = kstrdup(buf, GFP_KERNEL); 1119 if (!compressor) 1120 return -ENOMEM; 1121 1122 /* ignore trailing newline */ 1123 if (sz > 0 && compressor[sz - 1] == '\n') 1124 compressor[sz - 1] = 0x00; 1125 1126 if (!zcomp_available_algorithm(compressor)) { 1127 kfree(compressor); 1128 return -EINVAL; 1129 } 1130 1131 down_write(&zram->init_lock); 1132 if (init_done(zram)) { 1133 up_write(&zram->init_lock); 1134 kfree(compressor); 1135 pr_info("Can't change algorithm for initialized device\n"); 1136 return -EBUSY; 1137 } 1138 1139 comp_algorithm_set(zram, prio, compressor); 1140 up_write(&zram->init_lock); 1141 return 0; 1142 } 1143 1144 static void comp_params_reset(struct zram *zram, u32 prio) 1145 { 1146 struct zcomp_params *params = &zram->params[prio]; 1147 1148 vfree(params->dict); 1149 params->level = ZCOMP_PARAM_NO_LEVEL; 1150 params->dict_sz = 0; 1151 params->dict = NULL; 1152 } 1153 1154 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1155 const char *dict_path) 1156 { 1157 ssize_t sz = 0; 1158 1159 comp_params_reset(zram, prio); 1160 1161 if (dict_path) { 1162 sz = kernel_read_file_from_path(dict_path, 0, 1163 &zram->params[prio].dict, 1164 INT_MAX, 1165 NULL, 1166 READING_POLICY); 1167 if (sz < 0) 1168 return -EINVAL; 1169 } 1170 1171 zram->params[prio].dict_sz = sz; 1172 zram->params[prio].level = level; 1173 return 0; 1174 } 1175 1176 static ssize_t algorithm_params_store(struct device *dev, 1177 struct device_attribute *attr, 1178 const char *buf, 1179 size_t len) 1180 { 1181 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL; 1182 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1183 struct zram *zram = dev_to_zram(dev); 1184 int ret; 1185 1186 args = skip_spaces(buf); 1187 while (*args) { 1188 args = next_arg(args, ¶m, &val); 1189 1190 if (!val || !*val) 1191 return -EINVAL; 1192 1193 if (!strcmp(param, "priority")) { 1194 ret = kstrtoint(val, 10, &prio); 1195 if (ret) 1196 return ret; 1197 continue; 1198 } 1199 1200 if (!strcmp(param, "level")) { 1201 ret = kstrtoint(val, 10, &level); 1202 if (ret) 1203 return ret; 1204 continue; 1205 } 1206 1207 if (!strcmp(param, "algo")) { 1208 algo = val; 1209 continue; 1210 } 1211 1212 if (!strcmp(param, "dict")) { 1213 dict_path = val; 1214 continue; 1215 } 1216 } 1217 1218 /* Lookup priority by algorithm name */ 1219 if (algo) { 1220 s32 p; 1221 1222 prio = -EINVAL; 1223 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1224 if (!zram->comp_algs[p]) 1225 continue; 1226 1227 if (!strcmp(zram->comp_algs[p], algo)) { 1228 prio = p; 1229 break; 1230 } 1231 } 1232 } 1233 1234 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1235 return -EINVAL; 1236 1237 ret = comp_params_store(zram, prio, level, dict_path); 1238 return ret ? ret : len; 1239 } 1240 1241 static ssize_t comp_algorithm_show(struct device *dev, 1242 struct device_attribute *attr, 1243 char *buf) 1244 { 1245 struct zram *zram = dev_to_zram(dev); 1246 1247 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); 1248 } 1249 1250 static ssize_t comp_algorithm_store(struct device *dev, 1251 struct device_attribute *attr, 1252 const char *buf, 1253 size_t len) 1254 { 1255 struct zram *zram = dev_to_zram(dev); 1256 int ret; 1257 1258 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1259 return ret ? ret : len; 1260 } 1261 1262 #ifdef CONFIG_ZRAM_MULTI_COMP 1263 static ssize_t recomp_algorithm_show(struct device *dev, 1264 struct device_attribute *attr, 1265 char *buf) 1266 { 1267 struct zram *zram = dev_to_zram(dev); 1268 ssize_t sz = 0; 1269 u32 prio; 1270 1271 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1272 if (!zram->comp_algs[prio]) 1273 continue; 1274 1275 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); 1276 sz += __comp_algorithm_show(zram, prio, buf + sz); 1277 } 1278 1279 return sz; 1280 } 1281 1282 static ssize_t recomp_algorithm_store(struct device *dev, 1283 struct device_attribute *attr, 1284 const char *buf, 1285 size_t len) 1286 { 1287 struct zram *zram = dev_to_zram(dev); 1288 int prio = ZRAM_SECONDARY_COMP; 1289 char *args, *param, *val; 1290 char *alg = NULL; 1291 int ret; 1292 1293 args = skip_spaces(buf); 1294 while (*args) { 1295 args = next_arg(args, ¶m, &val); 1296 1297 if (!val || !*val) 1298 return -EINVAL; 1299 1300 if (!strcmp(param, "algo")) { 1301 alg = val; 1302 continue; 1303 } 1304 1305 if (!strcmp(param, "priority")) { 1306 ret = kstrtoint(val, 10, &prio); 1307 if (ret) 1308 return ret; 1309 continue; 1310 } 1311 } 1312 1313 if (!alg) 1314 return -EINVAL; 1315 1316 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1317 return -EINVAL; 1318 1319 ret = __comp_algorithm_store(zram, prio, alg); 1320 return ret ? ret : len; 1321 } 1322 #endif 1323 1324 static ssize_t compact_store(struct device *dev, 1325 struct device_attribute *attr, const char *buf, size_t len) 1326 { 1327 struct zram *zram = dev_to_zram(dev); 1328 1329 down_read(&zram->init_lock); 1330 if (!init_done(zram)) { 1331 up_read(&zram->init_lock); 1332 return -EINVAL; 1333 } 1334 1335 zs_compact(zram->mem_pool); 1336 up_read(&zram->init_lock); 1337 1338 return len; 1339 } 1340 1341 static ssize_t io_stat_show(struct device *dev, 1342 struct device_attribute *attr, char *buf) 1343 { 1344 struct zram *zram = dev_to_zram(dev); 1345 ssize_t ret; 1346 1347 down_read(&zram->init_lock); 1348 ret = scnprintf(buf, PAGE_SIZE, 1349 "%8llu %8llu 0 %8llu\n", 1350 (u64)atomic64_read(&zram->stats.failed_reads), 1351 (u64)atomic64_read(&zram->stats.failed_writes), 1352 (u64)atomic64_read(&zram->stats.notify_free)); 1353 up_read(&zram->init_lock); 1354 1355 return ret; 1356 } 1357 1358 static ssize_t mm_stat_show(struct device *dev, 1359 struct device_attribute *attr, char *buf) 1360 { 1361 struct zram *zram = dev_to_zram(dev); 1362 struct zs_pool_stats pool_stats; 1363 u64 orig_size, mem_used = 0; 1364 long max_used; 1365 ssize_t ret; 1366 1367 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1368 1369 down_read(&zram->init_lock); 1370 if (init_done(zram)) { 1371 mem_used = zs_get_total_pages(zram->mem_pool); 1372 zs_pool_stats(zram->mem_pool, &pool_stats); 1373 } 1374 1375 orig_size = atomic64_read(&zram->stats.pages_stored); 1376 max_used = atomic_long_read(&zram->stats.max_used_pages); 1377 1378 ret = scnprintf(buf, PAGE_SIZE, 1379 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1380 orig_size << PAGE_SHIFT, 1381 (u64)atomic64_read(&zram->stats.compr_data_size), 1382 mem_used << PAGE_SHIFT, 1383 zram->limit_pages << PAGE_SHIFT, 1384 max_used << PAGE_SHIFT, 1385 (u64)atomic64_read(&zram->stats.same_pages), 1386 atomic_long_read(&pool_stats.pages_compacted), 1387 (u64)atomic64_read(&zram->stats.huge_pages), 1388 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1389 up_read(&zram->init_lock); 1390 1391 return ret; 1392 } 1393 1394 #ifdef CONFIG_ZRAM_WRITEBACK 1395 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1396 static ssize_t bd_stat_show(struct device *dev, 1397 struct device_attribute *attr, char *buf) 1398 { 1399 struct zram *zram = dev_to_zram(dev); 1400 ssize_t ret; 1401 1402 down_read(&zram->init_lock); 1403 ret = scnprintf(buf, PAGE_SIZE, 1404 "%8llu %8llu %8llu\n", 1405 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1406 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1407 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1408 up_read(&zram->init_lock); 1409 1410 return ret; 1411 } 1412 #endif 1413 1414 static ssize_t debug_stat_show(struct device *dev, 1415 struct device_attribute *attr, char *buf) 1416 { 1417 int version = 1; 1418 struct zram *zram = dev_to_zram(dev); 1419 ssize_t ret; 1420 1421 down_read(&zram->init_lock); 1422 ret = scnprintf(buf, PAGE_SIZE, 1423 "version: %d\n%8llu %8llu\n", 1424 version, 1425 (u64)atomic64_read(&zram->stats.writestall), 1426 (u64)atomic64_read(&zram->stats.miss_free)); 1427 up_read(&zram->init_lock); 1428 1429 return ret; 1430 } 1431 1432 static DEVICE_ATTR_RO(io_stat); 1433 static DEVICE_ATTR_RO(mm_stat); 1434 #ifdef CONFIG_ZRAM_WRITEBACK 1435 static DEVICE_ATTR_RO(bd_stat); 1436 #endif 1437 static DEVICE_ATTR_RO(debug_stat); 1438 1439 static void zram_meta_free(struct zram *zram, u64 disksize) 1440 { 1441 size_t num_pages = disksize >> PAGE_SHIFT; 1442 size_t index; 1443 1444 if (!zram->table) 1445 return; 1446 1447 /* Free all pages that are still in this zram device */ 1448 for (index = 0; index < num_pages; index++) 1449 zram_free_page(zram, index); 1450 1451 zs_destroy_pool(zram->mem_pool); 1452 vfree(zram->table); 1453 zram->table = NULL; 1454 } 1455 1456 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1457 { 1458 size_t num_pages, index; 1459 1460 num_pages = disksize >> PAGE_SHIFT; 1461 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1462 if (!zram->table) 1463 return false; 1464 1465 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1466 if (!zram->mem_pool) { 1467 vfree(zram->table); 1468 zram->table = NULL; 1469 return false; 1470 } 1471 1472 if (!huge_class_size) 1473 huge_class_size = zs_huge_class_size(zram->mem_pool); 1474 1475 for (index = 0; index < num_pages; index++) 1476 spin_lock_init(&zram->table[index].lock); 1477 return true; 1478 } 1479 1480 /* 1481 * To protect concurrent access to the same index entry, 1482 * caller should hold this table index entry's bit_spinlock to 1483 * indicate this index entry is accessing. 1484 */ 1485 static void zram_free_page(struct zram *zram, size_t index) 1486 { 1487 unsigned long handle; 1488 1489 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 1490 zram->table[index].ac_time = 0; 1491 #endif 1492 1493 zram_clear_flag(zram, index, ZRAM_IDLE); 1494 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1495 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 1496 zram_set_priority(zram, index, 0); 1497 1498 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1499 zram_clear_flag(zram, index, ZRAM_HUGE); 1500 atomic64_dec(&zram->stats.huge_pages); 1501 } 1502 1503 if (zram_test_flag(zram, index, ZRAM_WB)) { 1504 zram_clear_flag(zram, index, ZRAM_WB); 1505 free_block_bdev(zram, zram_get_handle(zram, index)); 1506 goto out; 1507 } 1508 1509 /* 1510 * No memory is allocated for same element filled pages. 1511 * Simply clear same page flag. 1512 */ 1513 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1514 zram_clear_flag(zram, index, ZRAM_SAME); 1515 atomic64_dec(&zram->stats.same_pages); 1516 goto out; 1517 } 1518 1519 handle = zram_get_handle(zram, index); 1520 if (!handle) 1521 return; 1522 1523 zs_free(zram->mem_pool, handle); 1524 1525 atomic64_sub(zram_get_obj_size(zram, index), 1526 &zram->stats.compr_data_size); 1527 out: 1528 atomic64_dec(&zram->stats.pages_stored); 1529 zram_set_handle(zram, index, 0); 1530 zram_set_obj_size(zram, index, 0); 1531 } 1532 1533 static int read_same_filled_page(struct zram *zram, struct page *page, 1534 u32 index) 1535 { 1536 void *mem; 1537 1538 mem = kmap_local_page(page); 1539 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index)); 1540 kunmap_local(mem); 1541 return 0; 1542 } 1543 1544 static int read_incompressible_page(struct zram *zram, struct page *page, 1545 u32 index) 1546 { 1547 unsigned long handle; 1548 void *src, *dst; 1549 1550 handle = zram_get_handle(zram, index); 1551 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1552 dst = kmap_local_page(page); 1553 copy_page(dst, src); 1554 kunmap_local(dst); 1555 zs_unmap_object(zram->mem_pool, handle); 1556 1557 return 0; 1558 } 1559 1560 static int read_compressed_page(struct zram *zram, struct page *page, u32 index) 1561 { 1562 struct zcomp_strm *zstrm; 1563 unsigned long handle; 1564 unsigned int size; 1565 void *src, *dst; 1566 int ret, prio; 1567 1568 handle = zram_get_handle(zram, index); 1569 size = zram_get_obj_size(zram, index); 1570 prio = zram_get_priority(zram, index); 1571 1572 zstrm = zcomp_stream_get(zram->comps[prio]); 1573 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1574 dst = kmap_local_page(page); 1575 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); 1576 kunmap_local(dst); 1577 zs_unmap_object(zram->mem_pool, handle); 1578 zcomp_stream_put(zram->comps[prio]); 1579 1580 return ret; 1581 } 1582 1583 /* 1584 * Reads (decompresses if needed) a page from zspool (zsmalloc). 1585 * Corresponding ZRAM slot should be locked. 1586 */ 1587 static int zram_read_from_zspool(struct zram *zram, struct page *page, 1588 u32 index) 1589 { 1590 if (zram_test_flag(zram, index, ZRAM_SAME) || 1591 !zram_get_handle(zram, index)) 1592 return read_same_filled_page(zram, page, index); 1593 1594 if (!zram_test_flag(zram, index, ZRAM_HUGE)) 1595 return read_compressed_page(zram, page, index); 1596 else 1597 return read_incompressible_page(zram, page, index); 1598 } 1599 1600 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 1601 struct bio *parent) 1602 { 1603 int ret; 1604 1605 zram_slot_lock(zram, index); 1606 if (!zram_test_flag(zram, index, ZRAM_WB)) { 1607 /* Slot should be locked through out the function call */ 1608 ret = zram_read_from_zspool(zram, page, index); 1609 zram_slot_unlock(zram, index); 1610 } else { 1611 /* 1612 * The slot should be unlocked before reading from the backing 1613 * device. 1614 */ 1615 zram_slot_unlock(zram, index); 1616 1617 ret = read_from_bdev(zram, page, zram_get_handle(zram, index), 1618 parent); 1619 } 1620 1621 /* Should NEVER happen. Return bio error if it does. */ 1622 if (WARN_ON(ret < 0)) 1623 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1624 1625 return ret; 1626 } 1627 1628 /* 1629 * Use a temporary buffer to decompress the page, as the decompressor 1630 * always expects a full page for the output. 1631 */ 1632 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 1633 u32 index, int offset) 1634 { 1635 struct page *page = alloc_page(GFP_NOIO); 1636 int ret; 1637 1638 if (!page) 1639 return -ENOMEM; 1640 ret = zram_read_page(zram, page, index, NULL); 1641 if (likely(!ret)) 1642 memcpy_to_bvec(bvec, page_address(page) + offset); 1643 __free_page(page); 1644 return ret; 1645 } 1646 1647 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1648 u32 index, int offset, struct bio *bio) 1649 { 1650 if (is_partial_io(bvec)) 1651 return zram_bvec_read_partial(zram, bvec, index, offset); 1652 return zram_read_page(zram, bvec->bv_page, index, bio); 1653 } 1654 1655 static int write_same_filled_page(struct zram *zram, unsigned long fill, 1656 u32 index) 1657 { 1658 zram_slot_lock(zram, index); 1659 zram_set_flag(zram, index, ZRAM_SAME); 1660 zram_set_handle(zram, index, fill); 1661 zram_slot_unlock(zram, index); 1662 1663 atomic64_inc(&zram->stats.same_pages); 1664 atomic64_inc(&zram->stats.pages_stored); 1665 1666 return 0; 1667 } 1668 1669 static int write_incompressible_page(struct zram *zram, struct page *page, 1670 u32 index) 1671 { 1672 unsigned long handle; 1673 void *src, *dst; 1674 1675 /* 1676 * This function is called from preemptible context so we don't need 1677 * to do optimistic and fallback to pessimistic handle allocation, 1678 * like we do for compressible pages. 1679 */ 1680 handle = zs_malloc(zram->mem_pool, PAGE_SIZE, 1681 GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); 1682 if (IS_ERR_VALUE(handle)) 1683 return PTR_ERR((void *)handle); 1684 1685 if (!zram_can_store_page(zram)) { 1686 zs_free(zram->mem_pool, handle); 1687 return -ENOMEM; 1688 } 1689 1690 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1691 src = kmap_local_page(page); 1692 memcpy(dst, src, PAGE_SIZE); 1693 kunmap_local(src); 1694 zs_unmap_object(zram->mem_pool, handle); 1695 1696 zram_slot_lock(zram, index); 1697 zram_set_flag(zram, index, ZRAM_HUGE); 1698 zram_set_handle(zram, index, handle); 1699 zram_set_obj_size(zram, index, PAGE_SIZE); 1700 zram_slot_unlock(zram, index); 1701 1702 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); 1703 atomic64_inc(&zram->stats.huge_pages); 1704 atomic64_inc(&zram->stats.huge_pages_since); 1705 atomic64_inc(&zram->stats.pages_stored); 1706 1707 return 0; 1708 } 1709 1710 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 1711 { 1712 int ret = 0; 1713 unsigned long handle = -ENOMEM; 1714 unsigned int comp_len = 0; 1715 void *dst, *mem; 1716 struct zcomp_strm *zstrm; 1717 unsigned long element = 0; 1718 bool same_filled; 1719 1720 /* First, free memory allocated to this slot (if any) */ 1721 zram_slot_lock(zram, index); 1722 zram_free_page(zram, index); 1723 zram_slot_unlock(zram, index); 1724 1725 mem = kmap_local_page(page); 1726 same_filled = page_same_filled(mem, &element); 1727 kunmap_local(mem); 1728 if (same_filled) 1729 return write_same_filled_page(zram, element, index); 1730 1731 compress_again: 1732 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1733 mem = kmap_local_page(page); 1734 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 1735 mem, &comp_len); 1736 kunmap_local(mem); 1737 1738 if (unlikely(ret)) { 1739 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1740 pr_err("Compression failed! err=%d\n", ret); 1741 zs_free(zram->mem_pool, handle); 1742 return ret; 1743 } 1744 1745 if (comp_len >= huge_class_size) { 1746 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1747 return write_incompressible_page(zram, page, index); 1748 } 1749 1750 /* 1751 * handle allocation has 2 paths: 1752 * a) fast path is executed with preemption disabled (for 1753 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1754 * since we can't sleep; 1755 * b) slow path enables preemption and attempts to allocate 1756 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1757 * put per-cpu compression stream and, thus, to re-do 1758 * the compression once handle is allocated. 1759 * 1760 * if we have a 'non-null' handle here then we are coming 1761 * from the slow path and handle has already been allocated. 1762 */ 1763 if (IS_ERR_VALUE(handle)) 1764 handle = zs_malloc(zram->mem_pool, comp_len, 1765 __GFP_KSWAPD_RECLAIM | 1766 __GFP_NOWARN | 1767 __GFP_HIGHMEM | 1768 __GFP_MOVABLE); 1769 if (IS_ERR_VALUE(handle)) { 1770 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1771 atomic64_inc(&zram->stats.writestall); 1772 handle = zs_malloc(zram->mem_pool, comp_len, 1773 GFP_NOIO | __GFP_HIGHMEM | 1774 __GFP_MOVABLE); 1775 if (IS_ERR_VALUE(handle)) 1776 return PTR_ERR((void *)handle); 1777 1778 goto compress_again; 1779 } 1780 1781 if (!zram_can_store_page(zram)) { 1782 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1783 zs_free(zram->mem_pool, handle); 1784 return -ENOMEM; 1785 } 1786 1787 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1788 1789 memcpy(dst, zstrm->buffer, comp_len); 1790 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1791 zs_unmap_object(zram->mem_pool, handle); 1792 1793 zram_slot_lock(zram, index); 1794 zram_set_handle(zram, index, handle); 1795 zram_set_obj_size(zram, index, comp_len); 1796 zram_slot_unlock(zram, index); 1797 1798 /* Update stats */ 1799 atomic64_inc(&zram->stats.pages_stored); 1800 atomic64_add(comp_len, &zram->stats.compr_data_size); 1801 1802 return ret; 1803 } 1804 1805 /* 1806 * This is a partial IO. Read the full page before writing the changes. 1807 */ 1808 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 1809 u32 index, int offset, struct bio *bio) 1810 { 1811 struct page *page = alloc_page(GFP_NOIO); 1812 int ret; 1813 1814 if (!page) 1815 return -ENOMEM; 1816 1817 ret = zram_read_page(zram, page, index, bio); 1818 if (!ret) { 1819 memcpy_from_bvec(page_address(page) + offset, bvec); 1820 ret = zram_write_page(zram, page, index); 1821 } 1822 __free_page(page); 1823 return ret; 1824 } 1825 1826 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1827 u32 index, int offset, struct bio *bio) 1828 { 1829 if (is_partial_io(bvec)) 1830 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 1831 return zram_write_page(zram, bvec->bv_page, index); 1832 } 1833 1834 #ifdef CONFIG_ZRAM_MULTI_COMP 1835 #define RECOMPRESS_IDLE (1 << 0) 1836 #define RECOMPRESS_HUGE (1 << 1) 1837 1838 static int scan_slots_for_recompress(struct zram *zram, u32 mode, 1839 struct zram_pp_ctl *ctl) 1840 { 1841 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1842 struct zram_pp_slot *pps = NULL; 1843 unsigned long index; 1844 1845 for (index = 0; index < nr_pages; index++) { 1846 if (!pps) 1847 pps = kmalloc(sizeof(*pps), GFP_KERNEL); 1848 if (!pps) 1849 return -ENOMEM; 1850 1851 INIT_LIST_HEAD(&pps->entry); 1852 1853 zram_slot_lock(zram, index); 1854 if (!zram_allocated(zram, index)) 1855 goto next; 1856 1857 if (mode & RECOMPRESS_IDLE && 1858 !zram_test_flag(zram, index, ZRAM_IDLE)) 1859 goto next; 1860 1861 if (mode & RECOMPRESS_HUGE && 1862 !zram_test_flag(zram, index, ZRAM_HUGE)) 1863 goto next; 1864 1865 if (zram_test_flag(zram, index, ZRAM_WB) || 1866 zram_test_flag(zram, index, ZRAM_SAME) || 1867 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1868 goto next; 1869 1870 pps->index = index; 1871 place_pp_slot(zram, ctl, pps); 1872 pps = NULL; 1873 next: 1874 zram_slot_unlock(zram, index); 1875 } 1876 1877 kfree(pps); 1878 return 0; 1879 } 1880 1881 /* 1882 * This function will decompress (unless it's ZRAM_HUGE) the page and then 1883 * attempt to compress it using provided compression algorithm priority 1884 * (which is potentially more effective). 1885 * 1886 * Corresponding ZRAM slot should be locked. 1887 */ 1888 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 1889 u64 *num_recomp_pages, u32 threshold, u32 prio, 1890 u32 prio_max) 1891 { 1892 struct zcomp_strm *zstrm = NULL; 1893 unsigned long handle_old; 1894 unsigned long handle_new; 1895 unsigned int comp_len_old; 1896 unsigned int comp_len_new; 1897 unsigned int class_index_old; 1898 unsigned int class_index_new; 1899 u32 num_recomps = 0; 1900 void *src, *dst; 1901 int ret; 1902 1903 handle_old = zram_get_handle(zram, index); 1904 if (!handle_old) 1905 return -EINVAL; 1906 1907 comp_len_old = zram_get_obj_size(zram, index); 1908 /* 1909 * Do not recompress objects that are already "small enough". 1910 */ 1911 if (comp_len_old < threshold) 1912 return 0; 1913 1914 ret = zram_read_from_zspool(zram, page, index); 1915 if (ret) 1916 return ret; 1917 1918 /* 1919 * We touched this entry so mark it as non-IDLE. This makes sure that 1920 * we don't preserve IDLE flag and don't incorrectly pick this entry 1921 * for different post-processing type (e.g. writeback). 1922 */ 1923 zram_clear_flag(zram, index, ZRAM_IDLE); 1924 1925 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 1926 /* 1927 * Iterate the secondary comp algorithms list (in order of priority) 1928 * and try to recompress the page. 1929 */ 1930 for (; prio < prio_max; prio++) { 1931 if (!zram->comps[prio]) 1932 continue; 1933 1934 /* 1935 * Skip if the object is already re-compressed with a higher 1936 * priority algorithm (or same algorithm). 1937 */ 1938 if (prio <= zram_get_priority(zram, index)) 1939 continue; 1940 1941 num_recomps++; 1942 zstrm = zcomp_stream_get(zram->comps[prio]); 1943 src = kmap_local_page(page); 1944 ret = zcomp_compress(zram->comps[prio], zstrm, 1945 src, &comp_len_new); 1946 kunmap_local(src); 1947 1948 if (ret) { 1949 zcomp_stream_put(zram->comps[prio]); 1950 return ret; 1951 } 1952 1953 class_index_new = zs_lookup_class_index(zram->mem_pool, 1954 comp_len_new); 1955 1956 /* Continue until we make progress */ 1957 if (class_index_new >= class_index_old || 1958 (threshold && comp_len_new >= threshold)) { 1959 zcomp_stream_put(zram->comps[prio]); 1960 continue; 1961 } 1962 1963 /* Recompression was successful so break out */ 1964 break; 1965 } 1966 1967 /* 1968 * We did not try to recompress, e.g. when we have only one 1969 * secondary algorithm and the page is already recompressed 1970 * using that algorithm 1971 */ 1972 if (!zstrm) 1973 return 0; 1974 1975 /* 1976 * Decrement the limit (if set) on pages we can recompress, even 1977 * when current recompression was unsuccessful or did not compress 1978 * the page below the threshold, because we still spent resources 1979 * on it. 1980 */ 1981 if (*num_recomp_pages) 1982 *num_recomp_pages -= 1; 1983 1984 if (class_index_new >= class_index_old) { 1985 /* 1986 * Secondary algorithms failed to re-compress the page 1987 * in a way that would save memory, mark the object as 1988 * incompressible so that we will not try to compress 1989 * it again. 1990 * 1991 * We need to make sure that all secondary algorithms have 1992 * failed, so we test if the number of recompressions matches 1993 * the number of active secondary algorithms. 1994 */ 1995 if (num_recomps == zram->num_active_comps - 1) 1996 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1997 return 0; 1998 } 1999 2000 /* Successful recompression but above threshold */ 2001 if (threshold && comp_len_new >= threshold) 2002 return 0; 2003 2004 /* 2005 * No direct reclaim (slow path) for handle allocation and no 2006 * re-compression attempt (unlike in zram_write_bvec()) since 2007 * we already have stored that object in zsmalloc. If we cannot 2008 * alloc memory for recompressed object then we bail out and 2009 * simply keep the old (existing) object in zsmalloc. 2010 */ 2011 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 2012 __GFP_KSWAPD_RECLAIM | 2013 __GFP_NOWARN | 2014 __GFP_HIGHMEM | 2015 __GFP_MOVABLE); 2016 if (IS_ERR_VALUE(handle_new)) { 2017 zcomp_stream_put(zram->comps[prio]); 2018 return PTR_ERR((void *)handle_new); 2019 } 2020 2021 dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); 2022 memcpy(dst, zstrm->buffer, comp_len_new); 2023 zcomp_stream_put(zram->comps[prio]); 2024 2025 zs_unmap_object(zram->mem_pool, handle_new); 2026 2027 zram_free_page(zram, index); 2028 zram_set_handle(zram, index, handle_new); 2029 zram_set_obj_size(zram, index, comp_len_new); 2030 zram_set_priority(zram, index, prio); 2031 2032 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 2033 atomic64_inc(&zram->stats.pages_stored); 2034 2035 return 0; 2036 } 2037 2038 static ssize_t recompress_store(struct device *dev, 2039 struct device_attribute *attr, 2040 const char *buf, size_t len) 2041 { 2042 u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; 2043 struct zram *zram = dev_to_zram(dev); 2044 char *args, *param, *val, *algo = NULL; 2045 u64 num_recomp_pages = ULLONG_MAX; 2046 struct zram_pp_ctl *ctl = NULL; 2047 struct zram_pp_slot *pps; 2048 u32 mode = 0, threshold = 0; 2049 struct page *page; 2050 ssize_t ret; 2051 2052 args = skip_spaces(buf); 2053 while (*args) { 2054 args = next_arg(args, ¶m, &val); 2055 2056 if (!val || !*val) 2057 return -EINVAL; 2058 2059 if (!strcmp(param, "type")) { 2060 if (!strcmp(val, "idle")) 2061 mode = RECOMPRESS_IDLE; 2062 if (!strcmp(val, "huge")) 2063 mode = RECOMPRESS_HUGE; 2064 if (!strcmp(val, "huge_idle")) 2065 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2066 continue; 2067 } 2068 2069 if (!strcmp(param, "max_pages")) { 2070 /* 2071 * Limit the number of entries (pages) we attempt to 2072 * recompress. 2073 */ 2074 ret = kstrtoull(val, 10, &num_recomp_pages); 2075 if (ret) 2076 return ret; 2077 continue; 2078 } 2079 2080 if (!strcmp(param, "threshold")) { 2081 /* 2082 * We will re-compress only idle objects equal or 2083 * greater in size than watermark. 2084 */ 2085 ret = kstrtouint(val, 10, &threshold); 2086 if (ret) 2087 return ret; 2088 continue; 2089 } 2090 2091 if (!strcmp(param, "algo")) { 2092 algo = val; 2093 continue; 2094 } 2095 2096 if (!strcmp(param, "priority")) { 2097 ret = kstrtouint(val, 10, &prio); 2098 if (ret) 2099 return ret; 2100 2101 if (prio == ZRAM_PRIMARY_COMP) 2102 prio = ZRAM_SECONDARY_COMP; 2103 2104 prio_max = min(prio + 1, ZRAM_MAX_COMPS); 2105 continue; 2106 } 2107 } 2108 2109 if (threshold >= huge_class_size) 2110 return -EINVAL; 2111 2112 down_read(&zram->init_lock); 2113 if (!init_done(zram)) { 2114 ret = -EINVAL; 2115 goto release_init_lock; 2116 } 2117 2118 /* Do not permit concurrent post-processing actions. */ 2119 if (atomic_xchg(&zram->pp_in_progress, 1)) { 2120 up_read(&zram->init_lock); 2121 return -EAGAIN; 2122 } 2123 2124 if (algo) { 2125 bool found = false; 2126 2127 for (; prio < ZRAM_MAX_COMPS; prio++) { 2128 if (!zram->comp_algs[prio]) 2129 continue; 2130 2131 if (!strcmp(zram->comp_algs[prio], algo)) { 2132 prio_max = min(prio + 1, ZRAM_MAX_COMPS); 2133 found = true; 2134 break; 2135 } 2136 } 2137 2138 if (!found) { 2139 ret = -EINVAL; 2140 goto release_init_lock; 2141 } 2142 } 2143 2144 page = alloc_page(GFP_KERNEL); 2145 if (!page) { 2146 ret = -ENOMEM; 2147 goto release_init_lock; 2148 } 2149 2150 ctl = init_pp_ctl(); 2151 if (!ctl) { 2152 ret = -ENOMEM; 2153 goto release_init_lock; 2154 } 2155 2156 scan_slots_for_recompress(zram, mode, ctl); 2157 2158 ret = len; 2159 while ((pps = select_pp_slot(ctl))) { 2160 int err = 0; 2161 2162 if (!num_recomp_pages) 2163 break; 2164 2165 zram_slot_lock(zram, pps->index); 2166 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) 2167 goto next; 2168 2169 err = recompress_slot(zram, pps->index, page, 2170 &num_recomp_pages, threshold, 2171 prio, prio_max); 2172 next: 2173 zram_slot_unlock(zram, pps->index); 2174 release_pp_slot(zram, pps); 2175 2176 if (err) { 2177 ret = err; 2178 break; 2179 } 2180 2181 cond_resched(); 2182 } 2183 2184 __free_page(page); 2185 2186 release_init_lock: 2187 release_pp_ctl(zram, ctl); 2188 atomic_set(&zram->pp_in_progress, 0); 2189 up_read(&zram->init_lock); 2190 return ret; 2191 } 2192 #endif 2193 2194 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2195 { 2196 size_t n = bio->bi_iter.bi_size; 2197 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2198 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2199 SECTOR_SHIFT; 2200 2201 /* 2202 * zram manages data in physical block size units. Because logical block 2203 * size isn't identical with physical block size on some arch, we 2204 * could get a discard request pointing to a specific offset within a 2205 * certain physical block. Although we can handle this request by 2206 * reading that physiclal block and decompressing and partially zeroing 2207 * and re-compressing and then re-storing it, this isn't reasonable 2208 * because our intent with a discard request is to save memory. So 2209 * skipping this logical block is appropriate here. 2210 */ 2211 if (offset) { 2212 if (n <= (PAGE_SIZE - offset)) 2213 return; 2214 2215 n -= (PAGE_SIZE - offset); 2216 index++; 2217 } 2218 2219 while (n >= PAGE_SIZE) { 2220 zram_slot_lock(zram, index); 2221 zram_free_page(zram, index); 2222 zram_slot_unlock(zram, index); 2223 atomic64_inc(&zram->stats.notify_free); 2224 index++; 2225 n -= PAGE_SIZE; 2226 } 2227 2228 bio_endio(bio); 2229 } 2230 2231 static void zram_bio_read(struct zram *zram, struct bio *bio) 2232 { 2233 unsigned long start_time = bio_start_io_acct(bio); 2234 struct bvec_iter iter = bio->bi_iter; 2235 2236 do { 2237 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2238 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2239 SECTOR_SHIFT; 2240 struct bio_vec bv = bio_iter_iovec(bio, iter); 2241 2242 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2243 2244 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2245 atomic64_inc(&zram->stats.failed_reads); 2246 bio->bi_status = BLK_STS_IOERR; 2247 break; 2248 } 2249 flush_dcache_page(bv.bv_page); 2250 2251 zram_slot_lock(zram, index); 2252 zram_accessed(zram, index); 2253 zram_slot_unlock(zram, index); 2254 2255 bio_advance_iter_single(bio, &iter, bv.bv_len); 2256 } while (iter.bi_size); 2257 2258 bio_end_io_acct(bio, start_time); 2259 bio_endio(bio); 2260 } 2261 2262 static void zram_bio_write(struct zram *zram, struct bio *bio) 2263 { 2264 unsigned long start_time = bio_start_io_acct(bio); 2265 struct bvec_iter iter = bio->bi_iter; 2266 2267 do { 2268 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2269 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2270 SECTOR_SHIFT; 2271 struct bio_vec bv = bio_iter_iovec(bio, iter); 2272 2273 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2274 2275 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2276 atomic64_inc(&zram->stats.failed_writes); 2277 bio->bi_status = BLK_STS_IOERR; 2278 break; 2279 } 2280 2281 zram_slot_lock(zram, index); 2282 zram_accessed(zram, index); 2283 zram_slot_unlock(zram, index); 2284 2285 bio_advance_iter_single(bio, &iter, bv.bv_len); 2286 } while (iter.bi_size); 2287 2288 bio_end_io_acct(bio, start_time); 2289 bio_endio(bio); 2290 } 2291 2292 /* 2293 * Handler function for all zram I/O requests. 2294 */ 2295 static void zram_submit_bio(struct bio *bio) 2296 { 2297 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2298 2299 switch (bio_op(bio)) { 2300 case REQ_OP_READ: 2301 zram_bio_read(zram, bio); 2302 break; 2303 case REQ_OP_WRITE: 2304 zram_bio_write(zram, bio); 2305 break; 2306 case REQ_OP_DISCARD: 2307 case REQ_OP_WRITE_ZEROES: 2308 zram_bio_discard(zram, bio); 2309 break; 2310 default: 2311 WARN_ON_ONCE(1); 2312 bio_endio(bio); 2313 } 2314 } 2315 2316 static void zram_slot_free_notify(struct block_device *bdev, 2317 unsigned long index) 2318 { 2319 struct zram *zram; 2320 2321 zram = bdev->bd_disk->private_data; 2322 2323 atomic64_inc(&zram->stats.notify_free); 2324 if (!zram_slot_trylock(zram, index)) { 2325 atomic64_inc(&zram->stats.miss_free); 2326 return; 2327 } 2328 2329 zram_free_page(zram, index); 2330 zram_slot_unlock(zram, index); 2331 } 2332 2333 static void zram_comp_params_reset(struct zram *zram) 2334 { 2335 u32 prio; 2336 2337 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2338 comp_params_reset(zram, prio); 2339 } 2340 } 2341 2342 static void zram_destroy_comps(struct zram *zram) 2343 { 2344 u32 prio; 2345 2346 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2347 struct zcomp *comp = zram->comps[prio]; 2348 2349 zram->comps[prio] = NULL; 2350 if (!comp) 2351 continue; 2352 zcomp_destroy(comp); 2353 zram->num_active_comps--; 2354 } 2355 2356 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2357 /* Do not free statically defined compression algorithms */ 2358 if (zram->comp_algs[prio] != default_compressor) 2359 kfree(zram->comp_algs[prio]); 2360 zram->comp_algs[prio] = NULL; 2361 } 2362 2363 zram_comp_params_reset(zram); 2364 } 2365 2366 static void zram_reset_device(struct zram *zram) 2367 { 2368 down_write(&zram->init_lock); 2369 2370 zram->limit_pages = 0; 2371 2372 set_capacity_and_notify(zram->disk, 0); 2373 part_stat_set_all(zram->disk->part0, 0); 2374 2375 /* I/O operation under all of CPU are done so let's free */ 2376 zram_meta_free(zram, zram->disksize); 2377 zram->disksize = 0; 2378 zram_destroy_comps(zram); 2379 memset(&zram->stats, 0, sizeof(zram->stats)); 2380 atomic_set(&zram->pp_in_progress, 0); 2381 reset_bdev(zram); 2382 2383 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2384 up_write(&zram->init_lock); 2385 } 2386 2387 static ssize_t disksize_store(struct device *dev, 2388 struct device_attribute *attr, const char *buf, size_t len) 2389 { 2390 u64 disksize; 2391 struct zcomp *comp; 2392 struct zram *zram = dev_to_zram(dev); 2393 int err; 2394 u32 prio; 2395 2396 disksize = memparse(buf, NULL); 2397 if (!disksize) 2398 return -EINVAL; 2399 2400 down_write(&zram->init_lock); 2401 if (init_done(zram)) { 2402 pr_info("Cannot change disksize for initialized device\n"); 2403 err = -EBUSY; 2404 goto out_unlock; 2405 } 2406 2407 disksize = PAGE_ALIGN(disksize); 2408 if (!zram_meta_alloc(zram, disksize)) { 2409 err = -ENOMEM; 2410 goto out_unlock; 2411 } 2412 2413 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2414 if (!zram->comp_algs[prio]) 2415 continue; 2416 2417 comp = zcomp_create(zram->comp_algs[prio], 2418 &zram->params[prio]); 2419 if (IS_ERR(comp)) { 2420 pr_err("Cannot initialise %s compressing backend\n", 2421 zram->comp_algs[prio]); 2422 err = PTR_ERR(comp); 2423 goto out_free_comps; 2424 } 2425 2426 zram->comps[prio] = comp; 2427 zram->num_active_comps++; 2428 } 2429 zram->disksize = disksize; 2430 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2431 up_write(&zram->init_lock); 2432 2433 return len; 2434 2435 out_free_comps: 2436 zram_destroy_comps(zram); 2437 zram_meta_free(zram, disksize); 2438 out_unlock: 2439 up_write(&zram->init_lock); 2440 return err; 2441 } 2442 2443 static ssize_t reset_store(struct device *dev, 2444 struct device_attribute *attr, const char *buf, size_t len) 2445 { 2446 int ret; 2447 unsigned short do_reset; 2448 struct zram *zram; 2449 struct gendisk *disk; 2450 2451 ret = kstrtou16(buf, 10, &do_reset); 2452 if (ret) 2453 return ret; 2454 2455 if (!do_reset) 2456 return -EINVAL; 2457 2458 zram = dev_to_zram(dev); 2459 disk = zram->disk; 2460 2461 mutex_lock(&disk->open_mutex); 2462 /* Do not reset an active device or claimed device */ 2463 if (disk_openers(disk) || zram->claim) { 2464 mutex_unlock(&disk->open_mutex); 2465 return -EBUSY; 2466 } 2467 2468 /* From now on, anyone can't open /dev/zram[0-9] */ 2469 zram->claim = true; 2470 mutex_unlock(&disk->open_mutex); 2471 2472 /* Make sure all the pending I/O are finished */ 2473 sync_blockdev(disk->part0); 2474 zram_reset_device(zram); 2475 2476 mutex_lock(&disk->open_mutex); 2477 zram->claim = false; 2478 mutex_unlock(&disk->open_mutex); 2479 2480 return len; 2481 } 2482 2483 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2484 { 2485 struct zram *zram = disk->private_data; 2486 2487 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2488 2489 /* zram was claimed to reset so open request fails */ 2490 if (zram->claim) 2491 return -EBUSY; 2492 return 0; 2493 } 2494 2495 static const struct block_device_operations zram_devops = { 2496 .open = zram_open, 2497 .submit_bio = zram_submit_bio, 2498 .swap_slot_free_notify = zram_slot_free_notify, 2499 .owner = THIS_MODULE 2500 }; 2501 2502 static DEVICE_ATTR_WO(compact); 2503 static DEVICE_ATTR_RW(disksize); 2504 static DEVICE_ATTR_RO(initstate); 2505 static DEVICE_ATTR_WO(reset); 2506 static DEVICE_ATTR_WO(mem_limit); 2507 static DEVICE_ATTR_WO(mem_used_max); 2508 static DEVICE_ATTR_WO(idle); 2509 static DEVICE_ATTR_RW(max_comp_streams); 2510 static DEVICE_ATTR_RW(comp_algorithm); 2511 #ifdef CONFIG_ZRAM_WRITEBACK 2512 static DEVICE_ATTR_RW(backing_dev); 2513 static DEVICE_ATTR_WO(writeback); 2514 static DEVICE_ATTR_RW(writeback_limit); 2515 static DEVICE_ATTR_RW(writeback_limit_enable); 2516 #endif 2517 #ifdef CONFIG_ZRAM_MULTI_COMP 2518 static DEVICE_ATTR_RW(recomp_algorithm); 2519 static DEVICE_ATTR_WO(recompress); 2520 #endif 2521 static DEVICE_ATTR_WO(algorithm_params); 2522 2523 static struct attribute *zram_disk_attrs[] = { 2524 &dev_attr_disksize.attr, 2525 &dev_attr_initstate.attr, 2526 &dev_attr_reset.attr, 2527 &dev_attr_compact.attr, 2528 &dev_attr_mem_limit.attr, 2529 &dev_attr_mem_used_max.attr, 2530 &dev_attr_idle.attr, 2531 &dev_attr_max_comp_streams.attr, 2532 &dev_attr_comp_algorithm.attr, 2533 #ifdef CONFIG_ZRAM_WRITEBACK 2534 &dev_attr_backing_dev.attr, 2535 &dev_attr_writeback.attr, 2536 &dev_attr_writeback_limit.attr, 2537 &dev_attr_writeback_limit_enable.attr, 2538 #endif 2539 &dev_attr_io_stat.attr, 2540 &dev_attr_mm_stat.attr, 2541 #ifdef CONFIG_ZRAM_WRITEBACK 2542 &dev_attr_bd_stat.attr, 2543 #endif 2544 &dev_attr_debug_stat.attr, 2545 #ifdef CONFIG_ZRAM_MULTI_COMP 2546 &dev_attr_recomp_algorithm.attr, 2547 &dev_attr_recompress.attr, 2548 #endif 2549 &dev_attr_algorithm_params.attr, 2550 NULL, 2551 }; 2552 2553 ATTRIBUTE_GROUPS(zram_disk); 2554 2555 /* 2556 * Allocate and initialize new zram device. the function returns 2557 * '>= 0' device_id upon success, and negative value otherwise. 2558 */ 2559 static int zram_add(void) 2560 { 2561 struct queue_limits lim = { 2562 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 2563 /* 2564 * To ensure that we always get PAGE_SIZE aligned and 2565 * n*PAGE_SIZED sized I/O requests. 2566 */ 2567 .physical_block_size = PAGE_SIZE, 2568 .io_min = PAGE_SIZE, 2569 .io_opt = PAGE_SIZE, 2570 .max_hw_discard_sectors = UINT_MAX, 2571 /* 2572 * zram_bio_discard() will clear all logical blocks if logical 2573 * block size is identical with physical block size(PAGE_SIZE). 2574 * But if it is different, we will skip discarding some parts of 2575 * logical blocks in the part of the request range which isn't 2576 * aligned to physical block size. So we can't ensure that all 2577 * discarded logical blocks are zeroed. 2578 */ 2579 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 2580 .max_write_zeroes_sectors = UINT_MAX, 2581 #endif 2582 .features = BLK_FEAT_STABLE_WRITES | 2583 BLK_FEAT_SYNCHRONOUS, 2584 }; 2585 struct zram *zram; 2586 int ret, device_id; 2587 2588 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 2589 if (!zram) 2590 return -ENOMEM; 2591 2592 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 2593 if (ret < 0) 2594 goto out_free_dev; 2595 device_id = ret; 2596 2597 init_rwsem(&zram->init_lock); 2598 #ifdef CONFIG_ZRAM_WRITEBACK 2599 spin_lock_init(&zram->wb_limit_lock); 2600 #endif 2601 2602 /* gendisk structure */ 2603 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 2604 if (IS_ERR(zram->disk)) { 2605 pr_err("Error allocating disk structure for device %d\n", 2606 device_id); 2607 ret = PTR_ERR(zram->disk); 2608 goto out_free_idr; 2609 } 2610 2611 zram->disk->major = zram_major; 2612 zram->disk->first_minor = device_id; 2613 zram->disk->minors = 1; 2614 zram->disk->flags |= GENHD_FL_NO_PART; 2615 zram->disk->fops = &zram_devops; 2616 zram->disk->private_data = zram; 2617 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 2618 atomic_set(&zram->pp_in_progress, 0); 2619 zram_comp_params_reset(zram); 2620 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2621 2622 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 2623 set_capacity(zram->disk, 0); 2624 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 2625 if (ret) 2626 goto out_cleanup_disk; 2627 2628 zram_debugfs_register(zram); 2629 pr_info("Added device: %s\n", zram->disk->disk_name); 2630 return device_id; 2631 2632 out_cleanup_disk: 2633 put_disk(zram->disk); 2634 out_free_idr: 2635 idr_remove(&zram_index_idr, device_id); 2636 out_free_dev: 2637 kfree(zram); 2638 return ret; 2639 } 2640 2641 static int zram_remove(struct zram *zram) 2642 { 2643 bool claimed; 2644 2645 mutex_lock(&zram->disk->open_mutex); 2646 if (disk_openers(zram->disk)) { 2647 mutex_unlock(&zram->disk->open_mutex); 2648 return -EBUSY; 2649 } 2650 2651 claimed = zram->claim; 2652 if (!claimed) 2653 zram->claim = true; 2654 mutex_unlock(&zram->disk->open_mutex); 2655 2656 zram_debugfs_unregister(zram); 2657 2658 if (claimed) { 2659 /* 2660 * If we were claimed by reset_store(), del_gendisk() will 2661 * wait until reset_store() is done, so nothing need to do. 2662 */ 2663 ; 2664 } else { 2665 /* Make sure all the pending I/O are finished */ 2666 sync_blockdev(zram->disk->part0); 2667 zram_reset_device(zram); 2668 } 2669 2670 pr_info("Removed device: %s\n", zram->disk->disk_name); 2671 2672 del_gendisk(zram->disk); 2673 2674 /* del_gendisk drains pending reset_store */ 2675 WARN_ON_ONCE(claimed && zram->claim); 2676 2677 /* 2678 * disksize_store() may be called in between zram_reset_device() 2679 * and del_gendisk(), so run the last reset to avoid leaking 2680 * anything allocated with disksize_store() 2681 */ 2682 zram_reset_device(zram); 2683 2684 put_disk(zram->disk); 2685 kfree(zram); 2686 return 0; 2687 } 2688 2689 /* zram-control sysfs attributes */ 2690 2691 /* 2692 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2693 * sense that reading from this file does alter the state of your system -- it 2694 * creates a new un-initialized zram device and returns back this device's 2695 * device_id (or an error code if it fails to create a new device). 2696 */ 2697 static ssize_t hot_add_show(const struct class *class, 2698 const struct class_attribute *attr, 2699 char *buf) 2700 { 2701 int ret; 2702 2703 mutex_lock(&zram_index_mutex); 2704 ret = zram_add(); 2705 mutex_unlock(&zram_index_mutex); 2706 2707 if (ret < 0) 2708 return ret; 2709 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2710 } 2711 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 2712 static struct class_attribute class_attr_hot_add = 2713 __ATTR(hot_add, 0400, hot_add_show, NULL); 2714 2715 static ssize_t hot_remove_store(const struct class *class, 2716 const struct class_attribute *attr, 2717 const char *buf, 2718 size_t count) 2719 { 2720 struct zram *zram; 2721 int ret, dev_id; 2722 2723 /* dev_id is gendisk->first_minor, which is `int' */ 2724 ret = kstrtoint(buf, 10, &dev_id); 2725 if (ret) 2726 return ret; 2727 if (dev_id < 0) 2728 return -EINVAL; 2729 2730 mutex_lock(&zram_index_mutex); 2731 2732 zram = idr_find(&zram_index_idr, dev_id); 2733 if (zram) { 2734 ret = zram_remove(zram); 2735 if (!ret) 2736 idr_remove(&zram_index_idr, dev_id); 2737 } else { 2738 ret = -ENODEV; 2739 } 2740 2741 mutex_unlock(&zram_index_mutex); 2742 return ret ? ret : count; 2743 } 2744 static CLASS_ATTR_WO(hot_remove); 2745 2746 static struct attribute *zram_control_class_attrs[] = { 2747 &class_attr_hot_add.attr, 2748 &class_attr_hot_remove.attr, 2749 NULL, 2750 }; 2751 ATTRIBUTE_GROUPS(zram_control_class); 2752 2753 static struct class zram_control_class = { 2754 .name = "zram-control", 2755 .class_groups = zram_control_class_groups, 2756 }; 2757 2758 static int zram_remove_cb(int id, void *ptr, void *data) 2759 { 2760 WARN_ON_ONCE(zram_remove(ptr)); 2761 return 0; 2762 } 2763 2764 static void destroy_devices(void) 2765 { 2766 class_unregister(&zram_control_class); 2767 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2768 zram_debugfs_destroy(); 2769 idr_destroy(&zram_index_idr); 2770 unregister_blkdev(zram_major, "zram"); 2771 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2772 } 2773 2774 static int __init zram_init(void) 2775 { 2776 struct zram_table_entry zram_te; 2777 int ret; 2778 2779 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); 2780 2781 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2782 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2783 if (ret < 0) 2784 return ret; 2785 2786 ret = class_register(&zram_control_class); 2787 if (ret) { 2788 pr_err("Unable to register zram-control class\n"); 2789 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2790 return ret; 2791 } 2792 2793 zram_debugfs_create(); 2794 zram_major = register_blkdev(0, "zram"); 2795 if (zram_major <= 0) { 2796 pr_err("Unable to get major number\n"); 2797 class_unregister(&zram_control_class); 2798 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2799 return -EBUSY; 2800 } 2801 2802 while (num_devices != 0) { 2803 mutex_lock(&zram_index_mutex); 2804 ret = zram_add(); 2805 mutex_unlock(&zram_index_mutex); 2806 if (ret < 0) 2807 goto out_error; 2808 num_devices--; 2809 } 2810 2811 return 0; 2812 2813 out_error: 2814 destroy_devices(); 2815 return ret; 2816 } 2817 2818 static void __exit zram_exit(void) 2819 { 2820 destroy_devices(); 2821 } 2822 2823 module_init(zram_init); 2824 module_exit(zram_exit); 2825 2826 module_param(num_devices, uint, 0); 2827 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2828 2829 MODULE_LICENSE("Dual BSD/GPL"); 2830 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2831 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2832