1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 #include <linux/kernel_read_file.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 /* Module params (documentation at end) */ 48 static unsigned int num_devices = 1; 49 /* 50 * Pages that compress to sizes equals or greater than this are stored 51 * uncompressed in memory. 52 */ 53 static size_t huge_class_size; 54 55 static const struct block_device_operations zram_devops; 56 57 static void zram_free_page(struct zram *zram, size_t index); 58 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 59 struct bio *parent); 60 61 static int zram_slot_trylock(struct zram *zram, u32 index) 62 { 63 return spin_trylock(&zram->table[index].lock); 64 } 65 66 static void zram_slot_lock(struct zram *zram, u32 index) 67 { 68 spin_lock(&zram->table[index].lock); 69 } 70 71 static void zram_slot_unlock(struct zram *zram, u32 index) 72 { 73 spin_unlock(&zram->table[index].lock); 74 } 75 76 static inline bool init_done(struct zram *zram) 77 { 78 return zram->disksize; 79 } 80 81 static inline struct zram *dev_to_zram(struct device *dev) 82 { 83 return (struct zram *)dev_to_disk(dev)->private_data; 84 } 85 86 static unsigned long zram_get_handle(struct zram *zram, u32 index) 87 { 88 return zram->table[index].handle; 89 } 90 91 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 92 { 93 zram->table[index].handle = handle; 94 } 95 96 /* flag operations require table entry bit_spin_lock() being held */ 97 static bool zram_test_flag(struct zram *zram, u32 index, 98 enum zram_pageflags flag) 99 { 100 return zram->table[index].flags & BIT(flag); 101 } 102 103 static void zram_set_flag(struct zram *zram, u32 index, 104 enum zram_pageflags flag) 105 { 106 zram->table[index].flags |= BIT(flag); 107 } 108 109 static void zram_clear_flag(struct zram *zram, u32 index, 110 enum zram_pageflags flag) 111 { 112 zram->table[index].flags &= ~BIT(flag); 113 } 114 115 static inline void zram_set_element(struct zram *zram, u32 index, 116 unsigned long element) 117 { 118 zram->table[index].element = element; 119 } 120 121 static unsigned long zram_get_element(struct zram *zram, u32 index) 122 { 123 return zram->table[index].element; 124 } 125 126 static size_t zram_get_obj_size(struct zram *zram, u32 index) 127 { 128 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 129 } 130 131 static void zram_set_obj_size(struct zram *zram, 132 u32 index, size_t size) 133 { 134 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 135 136 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 137 } 138 139 static inline bool zram_allocated(struct zram *zram, u32 index) 140 { 141 return zram_get_obj_size(zram, index) || 142 zram_test_flag(zram, index, ZRAM_SAME) || 143 zram_test_flag(zram, index, ZRAM_WB); 144 } 145 146 #if PAGE_SIZE != 4096 147 static inline bool is_partial_io(struct bio_vec *bvec) 148 { 149 return bvec->bv_len != PAGE_SIZE; 150 } 151 #define ZRAM_PARTIAL_IO 1 152 #else 153 static inline bool is_partial_io(struct bio_vec *bvec) 154 { 155 return false; 156 } 157 #endif 158 159 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) 160 { 161 prio &= ZRAM_COMP_PRIORITY_MASK; 162 /* 163 * Clear previous priority value first, in case if we recompress 164 * further an already recompressed page 165 */ 166 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << 167 ZRAM_COMP_PRIORITY_BIT1); 168 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 169 } 170 171 static inline u32 zram_get_priority(struct zram *zram, u32 index) 172 { 173 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; 174 175 return prio & ZRAM_COMP_PRIORITY_MASK; 176 } 177 178 static void zram_accessed(struct zram *zram, u32 index) 179 { 180 zram_clear_flag(zram, index, ZRAM_IDLE); 181 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 182 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 183 zram->table[index].ac_time = ktime_get_boottime(); 184 #endif 185 } 186 187 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP 188 struct zram_pp_slot { 189 unsigned long index; 190 struct list_head entry; 191 }; 192 193 /* 194 * A post-processing bucket is, essentially, a size class, this defines 195 * the range (in bytes) of pp-slots sizes in particular bucket. 196 */ 197 #define PP_BUCKET_SIZE_RANGE 64 198 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) 199 200 struct zram_pp_ctl { 201 struct list_head pp_buckets[NUM_PP_BUCKETS]; 202 }; 203 204 static struct zram_pp_ctl *init_pp_ctl(void) 205 { 206 struct zram_pp_ctl *ctl; 207 u32 idx; 208 209 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); 210 if (!ctl) 211 return NULL; 212 213 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) 214 INIT_LIST_HEAD(&ctl->pp_buckets[idx]); 215 return ctl; 216 } 217 218 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) 219 { 220 list_del_init(&pps->entry); 221 222 zram_slot_lock(zram, pps->index); 223 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); 224 zram_slot_unlock(zram, pps->index); 225 226 kfree(pps); 227 } 228 229 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) 230 { 231 u32 idx; 232 233 if (!ctl) 234 return; 235 236 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { 237 while (!list_empty(&ctl->pp_buckets[idx])) { 238 struct zram_pp_slot *pps; 239 240 pps = list_first_entry(&ctl->pp_buckets[idx], 241 struct zram_pp_slot, 242 entry); 243 release_pp_slot(zram, pps); 244 } 245 } 246 247 kfree(ctl); 248 } 249 250 static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, 251 struct zram_pp_slot *pps) 252 { 253 u32 idx; 254 255 idx = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; 256 list_add(&pps->entry, &ctl->pp_buckets[idx]); 257 258 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); 259 } 260 261 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) 262 { 263 struct zram_pp_slot *pps = NULL; 264 s32 idx = NUM_PP_BUCKETS - 1; 265 266 /* The higher the bucket id the more optimal slot post-processing is */ 267 while (idx >= 0) { 268 pps = list_first_entry_or_null(&ctl->pp_buckets[idx], 269 struct zram_pp_slot, 270 entry); 271 if (pps) 272 break; 273 274 idx--; 275 } 276 return pps; 277 } 278 #endif 279 280 static inline void update_used_max(struct zram *zram, 281 const unsigned long pages) 282 { 283 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 284 285 do { 286 if (cur_max >= pages) 287 return; 288 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 289 &cur_max, pages)); 290 } 291 292 static inline void zram_fill_page(void *ptr, unsigned long len, 293 unsigned long value) 294 { 295 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 296 memset_l(ptr, value, len / sizeof(unsigned long)); 297 } 298 299 static bool page_same_filled(void *ptr, unsigned long *element) 300 { 301 unsigned long *page; 302 unsigned long val; 303 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 304 305 page = (unsigned long *)ptr; 306 val = page[0]; 307 308 if (val != page[last_pos]) 309 return false; 310 311 for (pos = 1; pos < last_pos; pos++) { 312 if (val != page[pos]) 313 return false; 314 } 315 316 *element = val; 317 318 return true; 319 } 320 321 static ssize_t initstate_show(struct device *dev, 322 struct device_attribute *attr, char *buf) 323 { 324 u32 val; 325 struct zram *zram = dev_to_zram(dev); 326 327 down_read(&zram->init_lock); 328 val = init_done(zram); 329 up_read(&zram->init_lock); 330 331 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 332 } 333 334 static ssize_t disksize_show(struct device *dev, 335 struct device_attribute *attr, char *buf) 336 { 337 struct zram *zram = dev_to_zram(dev); 338 339 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 340 } 341 342 static ssize_t mem_limit_store(struct device *dev, 343 struct device_attribute *attr, const char *buf, size_t len) 344 { 345 u64 limit; 346 char *tmp; 347 struct zram *zram = dev_to_zram(dev); 348 349 limit = memparse(buf, &tmp); 350 if (buf == tmp) /* no chars parsed, invalid input */ 351 return -EINVAL; 352 353 down_write(&zram->init_lock); 354 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 355 up_write(&zram->init_lock); 356 357 return len; 358 } 359 360 static ssize_t mem_used_max_store(struct device *dev, 361 struct device_attribute *attr, const char *buf, size_t len) 362 { 363 int err; 364 unsigned long val; 365 struct zram *zram = dev_to_zram(dev); 366 367 err = kstrtoul(buf, 10, &val); 368 if (err || val != 0) 369 return -EINVAL; 370 371 down_read(&zram->init_lock); 372 if (init_done(zram)) { 373 atomic_long_set(&zram->stats.max_used_pages, 374 zs_get_total_pages(zram->mem_pool)); 375 } 376 up_read(&zram->init_lock); 377 378 return len; 379 } 380 381 /* 382 * Mark all pages which are older than or equal to cutoff as IDLE. 383 * Callers should hold the zram init lock in read mode 384 */ 385 static void mark_idle(struct zram *zram, ktime_t cutoff) 386 { 387 int is_idle = 1; 388 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 389 int index; 390 391 for (index = 0; index < nr_pages; index++) { 392 /* 393 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no 394 * post-processing (recompress, writeback) happens to the 395 * ZRAM_SAME slot. 396 * 397 * And ZRAM_WB slots simply cannot be ZRAM_IDLE. 398 */ 399 zram_slot_lock(zram, index); 400 if (!zram_allocated(zram, index) || 401 zram_test_flag(zram, index, ZRAM_WB) || 402 zram_test_flag(zram, index, ZRAM_SAME)) { 403 zram_slot_unlock(zram, index); 404 continue; 405 } 406 407 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 408 is_idle = !cutoff || 409 ktime_after(cutoff, zram->table[index].ac_time); 410 #endif 411 if (is_idle) 412 zram_set_flag(zram, index, ZRAM_IDLE); 413 zram_slot_unlock(zram, index); 414 } 415 } 416 417 static ssize_t idle_store(struct device *dev, 418 struct device_attribute *attr, const char *buf, size_t len) 419 { 420 struct zram *zram = dev_to_zram(dev); 421 ktime_t cutoff_time = 0; 422 ssize_t rv = -EINVAL; 423 424 if (!sysfs_streq(buf, "all")) { 425 /* 426 * If it did not parse as 'all' try to treat it as an integer 427 * when we have memory tracking enabled. 428 */ 429 u64 age_sec; 430 431 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) 432 cutoff_time = ktime_sub(ktime_get_boottime(), 433 ns_to_ktime(age_sec * NSEC_PER_SEC)); 434 else 435 goto out; 436 } 437 438 down_read(&zram->init_lock); 439 if (!init_done(zram)) 440 goto out_unlock; 441 442 /* 443 * A cutoff_time of 0 marks everything as idle, this is the 444 * "all" behavior. 445 */ 446 mark_idle(zram, cutoff_time); 447 rv = len; 448 449 out_unlock: 450 up_read(&zram->init_lock); 451 out: 452 return rv; 453 } 454 455 #ifdef CONFIG_ZRAM_WRITEBACK 456 static ssize_t writeback_limit_enable_store(struct device *dev, 457 struct device_attribute *attr, const char *buf, size_t len) 458 { 459 struct zram *zram = dev_to_zram(dev); 460 u64 val; 461 ssize_t ret = -EINVAL; 462 463 if (kstrtoull(buf, 10, &val)) 464 return ret; 465 466 down_read(&zram->init_lock); 467 spin_lock(&zram->wb_limit_lock); 468 zram->wb_limit_enable = val; 469 spin_unlock(&zram->wb_limit_lock); 470 up_read(&zram->init_lock); 471 ret = len; 472 473 return ret; 474 } 475 476 static ssize_t writeback_limit_enable_show(struct device *dev, 477 struct device_attribute *attr, char *buf) 478 { 479 bool val; 480 struct zram *zram = dev_to_zram(dev); 481 482 down_read(&zram->init_lock); 483 spin_lock(&zram->wb_limit_lock); 484 val = zram->wb_limit_enable; 485 spin_unlock(&zram->wb_limit_lock); 486 up_read(&zram->init_lock); 487 488 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 489 } 490 491 static ssize_t writeback_limit_store(struct device *dev, 492 struct device_attribute *attr, const char *buf, size_t len) 493 { 494 struct zram *zram = dev_to_zram(dev); 495 u64 val; 496 ssize_t ret = -EINVAL; 497 498 if (kstrtoull(buf, 10, &val)) 499 return ret; 500 501 down_read(&zram->init_lock); 502 spin_lock(&zram->wb_limit_lock); 503 zram->bd_wb_limit = val; 504 spin_unlock(&zram->wb_limit_lock); 505 up_read(&zram->init_lock); 506 ret = len; 507 508 return ret; 509 } 510 511 static ssize_t writeback_limit_show(struct device *dev, 512 struct device_attribute *attr, char *buf) 513 { 514 u64 val; 515 struct zram *zram = dev_to_zram(dev); 516 517 down_read(&zram->init_lock); 518 spin_lock(&zram->wb_limit_lock); 519 val = zram->bd_wb_limit; 520 spin_unlock(&zram->wb_limit_lock); 521 up_read(&zram->init_lock); 522 523 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 524 } 525 526 static void reset_bdev(struct zram *zram) 527 { 528 if (!zram->backing_dev) 529 return; 530 531 /* hope filp_close flush all of IO */ 532 filp_close(zram->backing_dev, NULL); 533 zram->backing_dev = NULL; 534 zram->bdev = NULL; 535 zram->disk->fops = &zram_devops; 536 kvfree(zram->bitmap); 537 zram->bitmap = NULL; 538 } 539 540 static ssize_t backing_dev_show(struct device *dev, 541 struct device_attribute *attr, char *buf) 542 { 543 struct file *file; 544 struct zram *zram = dev_to_zram(dev); 545 char *p; 546 ssize_t ret; 547 548 down_read(&zram->init_lock); 549 file = zram->backing_dev; 550 if (!file) { 551 memcpy(buf, "none\n", 5); 552 up_read(&zram->init_lock); 553 return 5; 554 } 555 556 p = file_path(file, buf, PAGE_SIZE - 1); 557 if (IS_ERR(p)) { 558 ret = PTR_ERR(p); 559 goto out; 560 } 561 562 ret = strlen(p); 563 memmove(buf, p, ret); 564 buf[ret++] = '\n'; 565 out: 566 up_read(&zram->init_lock); 567 return ret; 568 } 569 570 static ssize_t backing_dev_store(struct device *dev, 571 struct device_attribute *attr, const char *buf, size_t len) 572 { 573 char *file_name; 574 size_t sz; 575 struct file *backing_dev = NULL; 576 struct inode *inode; 577 unsigned int bitmap_sz; 578 unsigned long nr_pages, *bitmap = NULL; 579 int err; 580 struct zram *zram = dev_to_zram(dev); 581 582 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 583 if (!file_name) 584 return -ENOMEM; 585 586 down_write(&zram->init_lock); 587 if (init_done(zram)) { 588 pr_info("Can't setup backing device for initialized device\n"); 589 err = -EBUSY; 590 goto out; 591 } 592 593 strscpy(file_name, buf, PATH_MAX); 594 /* ignore trailing newline */ 595 sz = strlen(file_name); 596 if (sz > 0 && file_name[sz - 1] == '\n') 597 file_name[sz - 1] = 0x00; 598 599 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 600 if (IS_ERR(backing_dev)) { 601 err = PTR_ERR(backing_dev); 602 backing_dev = NULL; 603 goto out; 604 } 605 606 inode = backing_dev->f_mapping->host; 607 608 /* Support only block device in this moment */ 609 if (!S_ISBLK(inode->i_mode)) { 610 err = -ENOTBLK; 611 goto out; 612 } 613 614 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 615 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 616 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 617 if (!bitmap) { 618 err = -ENOMEM; 619 goto out; 620 } 621 622 reset_bdev(zram); 623 624 zram->bdev = I_BDEV(inode); 625 zram->backing_dev = backing_dev; 626 zram->bitmap = bitmap; 627 zram->nr_pages = nr_pages; 628 up_write(&zram->init_lock); 629 630 pr_info("setup backing device %s\n", file_name); 631 kfree(file_name); 632 633 return len; 634 out: 635 kvfree(bitmap); 636 637 if (backing_dev) 638 filp_close(backing_dev, NULL); 639 640 up_write(&zram->init_lock); 641 642 kfree(file_name); 643 644 return err; 645 } 646 647 static unsigned long alloc_block_bdev(struct zram *zram) 648 { 649 unsigned long blk_idx = 1; 650 retry: 651 /* skip 0 bit to confuse zram.handle = 0 */ 652 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 653 if (blk_idx == zram->nr_pages) 654 return 0; 655 656 if (test_and_set_bit(blk_idx, zram->bitmap)) 657 goto retry; 658 659 atomic64_inc(&zram->stats.bd_count); 660 return blk_idx; 661 } 662 663 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 664 { 665 int was_set; 666 667 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 668 WARN_ON_ONCE(!was_set); 669 atomic64_dec(&zram->stats.bd_count); 670 } 671 672 static void read_from_bdev_async(struct zram *zram, struct page *page, 673 unsigned long entry, struct bio *parent) 674 { 675 struct bio *bio; 676 677 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 678 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 679 __bio_add_page(bio, page, PAGE_SIZE, 0); 680 bio_chain(bio, parent); 681 submit_bio(bio); 682 } 683 684 #define PAGE_WB_SIG "page_index=" 685 686 #define PAGE_WRITEBACK 0 687 #define HUGE_WRITEBACK (1<<0) 688 #define IDLE_WRITEBACK (1<<1) 689 #define INCOMPRESSIBLE_WRITEBACK (1<<2) 690 691 static int scan_slots_for_writeback(struct zram *zram, u32 mode, 692 unsigned long nr_pages, 693 unsigned long index, 694 struct zram_pp_ctl *ctl) 695 { 696 struct zram_pp_slot *pps = NULL; 697 698 for (; nr_pages != 0; index++, nr_pages--) { 699 if (!pps) 700 pps = kmalloc(sizeof(*pps), GFP_KERNEL); 701 if (!pps) 702 return -ENOMEM; 703 704 INIT_LIST_HEAD(&pps->entry); 705 706 zram_slot_lock(zram, index); 707 if (!zram_allocated(zram, index)) 708 goto next; 709 710 if (zram_test_flag(zram, index, ZRAM_WB) || 711 zram_test_flag(zram, index, ZRAM_SAME)) 712 goto next; 713 714 if (mode & IDLE_WRITEBACK && 715 !zram_test_flag(zram, index, ZRAM_IDLE)) 716 goto next; 717 if (mode & HUGE_WRITEBACK && 718 !zram_test_flag(zram, index, ZRAM_HUGE)) 719 goto next; 720 if (mode & INCOMPRESSIBLE_WRITEBACK && 721 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 722 goto next; 723 724 pps->index = index; 725 place_pp_slot(zram, ctl, pps); 726 pps = NULL; 727 next: 728 zram_slot_unlock(zram, index); 729 } 730 731 kfree(pps); 732 return 0; 733 } 734 735 static ssize_t writeback_store(struct device *dev, 736 struct device_attribute *attr, const char *buf, size_t len) 737 { 738 struct zram *zram = dev_to_zram(dev); 739 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 740 struct zram_pp_ctl *ctl = NULL; 741 struct zram_pp_slot *pps; 742 unsigned long index = 0; 743 struct bio bio; 744 struct bio_vec bio_vec; 745 struct page *page; 746 ssize_t ret = len; 747 int mode, err; 748 unsigned long blk_idx = 0; 749 750 if (sysfs_streq(buf, "idle")) 751 mode = IDLE_WRITEBACK; 752 else if (sysfs_streq(buf, "huge")) 753 mode = HUGE_WRITEBACK; 754 else if (sysfs_streq(buf, "huge_idle")) 755 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 756 else if (sysfs_streq(buf, "incompressible")) 757 mode = INCOMPRESSIBLE_WRITEBACK; 758 else { 759 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 760 return -EINVAL; 761 762 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 763 index >= nr_pages) 764 return -EINVAL; 765 766 nr_pages = 1; 767 mode = PAGE_WRITEBACK; 768 } 769 770 down_read(&zram->init_lock); 771 if (!init_done(zram)) { 772 ret = -EINVAL; 773 goto release_init_lock; 774 } 775 776 /* Do not permit concurrent post-processing actions. */ 777 if (atomic_xchg(&zram->pp_in_progress, 1)) { 778 up_read(&zram->init_lock); 779 return -EAGAIN; 780 } 781 782 if (!zram->backing_dev) { 783 ret = -ENODEV; 784 goto release_init_lock; 785 } 786 787 page = alloc_page(GFP_KERNEL); 788 if (!page) { 789 ret = -ENOMEM; 790 goto release_init_lock; 791 } 792 793 ctl = init_pp_ctl(); 794 if (!ctl) { 795 ret = -ENOMEM; 796 goto release_init_lock; 797 } 798 799 scan_slots_for_writeback(zram, mode, nr_pages, index, ctl); 800 801 while ((pps = select_pp_slot(ctl))) { 802 spin_lock(&zram->wb_limit_lock); 803 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 804 spin_unlock(&zram->wb_limit_lock); 805 ret = -EIO; 806 break; 807 } 808 spin_unlock(&zram->wb_limit_lock); 809 810 if (!blk_idx) { 811 blk_idx = alloc_block_bdev(zram); 812 if (!blk_idx) { 813 ret = -ENOSPC; 814 break; 815 } 816 } 817 818 index = pps->index; 819 zram_slot_lock(zram, index); 820 /* 821 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so 822 * slots can change in the meantime. If slots are accessed or 823 * freed they lose ZRAM_PP_SLOT flag and hence we don't 824 * post-process them. 825 */ 826 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 827 goto next; 828 zram_slot_unlock(zram, index); 829 830 if (zram_read_page(zram, page, index, NULL)) { 831 release_pp_slot(zram, pps); 832 continue; 833 } 834 835 bio_init(&bio, zram->bdev, &bio_vec, 1, 836 REQ_OP_WRITE | REQ_SYNC); 837 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 838 __bio_add_page(&bio, page, PAGE_SIZE, 0); 839 840 /* 841 * XXX: A single page IO would be inefficient for write 842 * but it would be not bad as starter. 843 */ 844 err = submit_bio_wait(&bio); 845 if (err) { 846 release_pp_slot(zram, pps); 847 /* 848 * BIO errors are not fatal, we continue and simply 849 * attempt to writeback the remaining objects (pages). 850 * At the same time we need to signal user-space that 851 * some writes (at least one, but also could be all of 852 * them) were not successful and we do so by returning 853 * the most recent BIO error. 854 */ 855 ret = err; 856 continue; 857 } 858 859 atomic64_inc(&zram->stats.bd_writes); 860 zram_slot_lock(zram, index); 861 /* 862 * Same as above, we release slot lock during writeback so 863 * slot can change under us: slot_free() or slot_free() and 864 * reallocation (zram_write_page()). In both cases slot loses 865 * ZRAM_PP_SLOT flag. No concurrent post-processing can set 866 * ZRAM_PP_SLOT on such slots until current post-processing 867 * finishes. 868 */ 869 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 870 goto next; 871 872 zram_free_page(zram, index); 873 zram_set_flag(zram, index, ZRAM_WB); 874 zram_set_element(zram, index, blk_idx); 875 blk_idx = 0; 876 atomic64_inc(&zram->stats.pages_stored); 877 spin_lock(&zram->wb_limit_lock); 878 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 879 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 880 spin_unlock(&zram->wb_limit_lock); 881 next: 882 zram_slot_unlock(zram, index); 883 release_pp_slot(zram, pps); 884 } 885 886 if (blk_idx) 887 free_block_bdev(zram, blk_idx); 888 __free_page(page); 889 release_init_lock: 890 release_pp_ctl(zram, ctl); 891 atomic_set(&zram->pp_in_progress, 0); 892 up_read(&zram->init_lock); 893 894 return ret; 895 } 896 897 struct zram_work { 898 struct work_struct work; 899 struct zram *zram; 900 unsigned long entry; 901 struct page *page; 902 int error; 903 }; 904 905 static void zram_sync_read(struct work_struct *work) 906 { 907 struct zram_work *zw = container_of(work, struct zram_work, work); 908 struct bio_vec bv; 909 struct bio bio; 910 911 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 912 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 913 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 914 zw->error = submit_bio_wait(&bio); 915 } 916 917 /* 918 * Block layer want one ->submit_bio to be active at a time, so if we use 919 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 920 * use a worker thread context. 921 */ 922 static int read_from_bdev_sync(struct zram *zram, struct page *page, 923 unsigned long entry) 924 { 925 struct zram_work work; 926 927 work.page = page; 928 work.zram = zram; 929 work.entry = entry; 930 931 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 932 queue_work(system_unbound_wq, &work.work); 933 flush_work(&work.work); 934 destroy_work_on_stack(&work.work); 935 936 return work.error; 937 } 938 939 static int read_from_bdev(struct zram *zram, struct page *page, 940 unsigned long entry, struct bio *parent) 941 { 942 atomic64_inc(&zram->stats.bd_reads); 943 if (!parent) { 944 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 945 return -EIO; 946 return read_from_bdev_sync(zram, page, entry); 947 } 948 read_from_bdev_async(zram, page, entry, parent); 949 return 0; 950 } 951 #else 952 static inline void reset_bdev(struct zram *zram) {}; 953 static int read_from_bdev(struct zram *zram, struct page *page, 954 unsigned long entry, struct bio *parent) 955 { 956 return -EIO; 957 } 958 959 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 960 #endif 961 962 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 963 964 static struct dentry *zram_debugfs_root; 965 966 static void zram_debugfs_create(void) 967 { 968 zram_debugfs_root = debugfs_create_dir("zram", NULL); 969 } 970 971 static void zram_debugfs_destroy(void) 972 { 973 debugfs_remove_recursive(zram_debugfs_root); 974 } 975 976 static ssize_t read_block_state(struct file *file, char __user *buf, 977 size_t count, loff_t *ppos) 978 { 979 char *kbuf; 980 ssize_t index, written = 0; 981 struct zram *zram = file->private_data; 982 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 983 struct timespec64 ts; 984 985 kbuf = kvmalloc(count, GFP_KERNEL); 986 if (!kbuf) 987 return -ENOMEM; 988 989 down_read(&zram->init_lock); 990 if (!init_done(zram)) { 991 up_read(&zram->init_lock); 992 kvfree(kbuf); 993 return -EINVAL; 994 } 995 996 for (index = *ppos; index < nr_pages; index++) { 997 int copied; 998 999 zram_slot_lock(zram, index); 1000 if (!zram_allocated(zram, index)) 1001 goto next; 1002 1003 ts = ktime_to_timespec64(zram->table[index].ac_time); 1004 copied = snprintf(kbuf + written, count, 1005 "%12zd %12lld.%06lu %c%c%c%c%c%c\n", 1006 index, (s64)ts.tv_sec, 1007 ts.tv_nsec / NSEC_PER_USEC, 1008 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 1009 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 1010 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 1011 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 1012 zram_get_priority(zram, index) ? 'r' : '.', 1013 zram_test_flag(zram, index, 1014 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 1015 1016 if (count <= copied) { 1017 zram_slot_unlock(zram, index); 1018 break; 1019 } 1020 written += copied; 1021 count -= copied; 1022 next: 1023 zram_slot_unlock(zram, index); 1024 *ppos += 1; 1025 } 1026 1027 up_read(&zram->init_lock); 1028 if (copy_to_user(buf, kbuf, written)) 1029 written = -EFAULT; 1030 kvfree(kbuf); 1031 1032 return written; 1033 } 1034 1035 static const struct file_operations proc_zram_block_state_op = { 1036 .open = simple_open, 1037 .read = read_block_state, 1038 .llseek = default_llseek, 1039 }; 1040 1041 static void zram_debugfs_register(struct zram *zram) 1042 { 1043 if (!zram_debugfs_root) 1044 return; 1045 1046 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 1047 zram_debugfs_root); 1048 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 1049 zram, &proc_zram_block_state_op); 1050 } 1051 1052 static void zram_debugfs_unregister(struct zram *zram) 1053 { 1054 debugfs_remove_recursive(zram->debugfs_dir); 1055 } 1056 #else 1057 static void zram_debugfs_create(void) {}; 1058 static void zram_debugfs_destroy(void) {}; 1059 static void zram_debugfs_register(struct zram *zram) {}; 1060 static void zram_debugfs_unregister(struct zram *zram) {}; 1061 #endif 1062 1063 /* 1064 * We switched to per-cpu streams and this attr is not needed anymore. 1065 * However, we will keep it around for some time, because: 1066 * a) we may revert per-cpu streams in the future 1067 * b) it's visible to user space and we need to follow our 2 years 1068 * retirement rule; but we already have a number of 'soon to be 1069 * altered' attrs, so max_comp_streams need to wait for the next 1070 * layoff cycle. 1071 */ 1072 static ssize_t max_comp_streams_show(struct device *dev, 1073 struct device_attribute *attr, char *buf) 1074 { 1075 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 1076 } 1077 1078 static ssize_t max_comp_streams_store(struct device *dev, 1079 struct device_attribute *attr, const char *buf, size_t len) 1080 { 1081 return len; 1082 } 1083 1084 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 1085 { 1086 /* Do not free statically defined compression algorithms */ 1087 if (zram->comp_algs[prio] != default_compressor) 1088 kfree(zram->comp_algs[prio]); 1089 1090 zram->comp_algs[prio] = alg; 1091 } 1092 1093 static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) 1094 { 1095 ssize_t sz; 1096 1097 down_read(&zram->init_lock); 1098 sz = zcomp_available_show(zram->comp_algs[prio], buf); 1099 up_read(&zram->init_lock); 1100 1101 return sz; 1102 } 1103 1104 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 1105 { 1106 char *compressor; 1107 size_t sz; 1108 1109 sz = strlen(buf); 1110 if (sz >= CRYPTO_MAX_ALG_NAME) 1111 return -E2BIG; 1112 1113 compressor = kstrdup(buf, GFP_KERNEL); 1114 if (!compressor) 1115 return -ENOMEM; 1116 1117 /* ignore trailing newline */ 1118 if (sz > 0 && compressor[sz - 1] == '\n') 1119 compressor[sz - 1] = 0x00; 1120 1121 if (!zcomp_available_algorithm(compressor)) { 1122 kfree(compressor); 1123 return -EINVAL; 1124 } 1125 1126 down_write(&zram->init_lock); 1127 if (init_done(zram)) { 1128 up_write(&zram->init_lock); 1129 kfree(compressor); 1130 pr_info("Can't change algorithm for initialized device\n"); 1131 return -EBUSY; 1132 } 1133 1134 comp_algorithm_set(zram, prio, compressor); 1135 up_write(&zram->init_lock); 1136 return 0; 1137 } 1138 1139 static void comp_params_reset(struct zram *zram, u32 prio) 1140 { 1141 struct zcomp_params *params = &zram->params[prio]; 1142 1143 vfree(params->dict); 1144 params->level = ZCOMP_PARAM_NO_LEVEL; 1145 params->dict_sz = 0; 1146 params->dict = NULL; 1147 } 1148 1149 static int comp_params_store(struct zram *zram, u32 prio, s32 level, 1150 const char *dict_path) 1151 { 1152 ssize_t sz = 0; 1153 1154 comp_params_reset(zram, prio); 1155 1156 if (dict_path) { 1157 sz = kernel_read_file_from_path(dict_path, 0, 1158 &zram->params[prio].dict, 1159 INT_MAX, 1160 NULL, 1161 READING_POLICY); 1162 if (sz < 0) 1163 return -EINVAL; 1164 } 1165 1166 zram->params[prio].dict_sz = sz; 1167 zram->params[prio].level = level; 1168 return 0; 1169 } 1170 1171 static ssize_t algorithm_params_store(struct device *dev, 1172 struct device_attribute *attr, 1173 const char *buf, 1174 size_t len) 1175 { 1176 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL; 1177 char *args, *param, *val, *algo = NULL, *dict_path = NULL; 1178 struct zram *zram = dev_to_zram(dev); 1179 int ret; 1180 1181 args = skip_spaces(buf); 1182 while (*args) { 1183 args = next_arg(args, ¶m, &val); 1184 1185 if (!val || !*val) 1186 return -EINVAL; 1187 1188 if (!strcmp(param, "priority")) { 1189 ret = kstrtoint(val, 10, &prio); 1190 if (ret) 1191 return ret; 1192 continue; 1193 } 1194 1195 if (!strcmp(param, "level")) { 1196 ret = kstrtoint(val, 10, &level); 1197 if (ret) 1198 return ret; 1199 continue; 1200 } 1201 1202 if (!strcmp(param, "algo")) { 1203 algo = val; 1204 continue; 1205 } 1206 1207 if (!strcmp(param, "dict")) { 1208 dict_path = val; 1209 continue; 1210 } 1211 } 1212 1213 /* Lookup priority by algorithm name */ 1214 if (algo) { 1215 s32 p; 1216 1217 prio = -EINVAL; 1218 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { 1219 if (!zram->comp_algs[p]) 1220 continue; 1221 1222 if (!strcmp(zram->comp_algs[p], algo)) { 1223 prio = p; 1224 break; 1225 } 1226 } 1227 } 1228 1229 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) 1230 return -EINVAL; 1231 1232 ret = comp_params_store(zram, prio, level, dict_path); 1233 return ret ? ret : len; 1234 } 1235 1236 static ssize_t comp_algorithm_show(struct device *dev, 1237 struct device_attribute *attr, 1238 char *buf) 1239 { 1240 struct zram *zram = dev_to_zram(dev); 1241 1242 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); 1243 } 1244 1245 static ssize_t comp_algorithm_store(struct device *dev, 1246 struct device_attribute *attr, 1247 const char *buf, 1248 size_t len) 1249 { 1250 struct zram *zram = dev_to_zram(dev); 1251 int ret; 1252 1253 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 1254 return ret ? ret : len; 1255 } 1256 1257 #ifdef CONFIG_ZRAM_MULTI_COMP 1258 static ssize_t recomp_algorithm_show(struct device *dev, 1259 struct device_attribute *attr, 1260 char *buf) 1261 { 1262 struct zram *zram = dev_to_zram(dev); 1263 ssize_t sz = 0; 1264 u32 prio; 1265 1266 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 1267 if (!zram->comp_algs[prio]) 1268 continue; 1269 1270 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); 1271 sz += __comp_algorithm_show(zram, prio, buf + sz); 1272 } 1273 1274 return sz; 1275 } 1276 1277 static ssize_t recomp_algorithm_store(struct device *dev, 1278 struct device_attribute *attr, 1279 const char *buf, 1280 size_t len) 1281 { 1282 struct zram *zram = dev_to_zram(dev); 1283 int prio = ZRAM_SECONDARY_COMP; 1284 char *args, *param, *val; 1285 char *alg = NULL; 1286 int ret; 1287 1288 args = skip_spaces(buf); 1289 while (*args) { 1290 args = next_arg(args, ¶m, &val); 1291 1292 if (!val || !*val) 1293 return -EINVAL; 1294 1295 if (!strcmp(param, "algo")) { 1296 alg = val; 1297 continue; 1298 } 1299 1300 if (!strcmp(param, "priority")) { 1301 ret = kstrtoint(val, 10, &prio); 1302 if (ret) 1303 return ret; 1304 continue; 1305 } 1306 } 1307 1308 if (!alg) 1309 return -EINVAL; 1310 1311 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1312 return -EINVAL; 1313 1314 ret = __comp_algorithm_store(zram, prio, alg); 1315 return ret ? ret : len; 1316 } 1317 #endif 1318 1319 static ssize_t compact_store(struct device *dev, 1320 struct device_attribute *attr, const char *buf, size_t len) 1321 { 1322 struct zram *zram = dev_to_zram(dev); 1323 1324 down_read(&zram->init_lock); 1325 if (!init_done(zram)) { 1326 up_read(&zram->init_lock); 1327 return -EINVAL; 1328 } 1329 1330 zs_compact(zram->mem_pool); 1331 up_read(&zram->init_lock); 1332 1333 return len; 1334 } 1335 1336 static ssize_t io_stat_show(struct device *dev, 1337 struct device_attribute *attr, char *buf) 1338 { 1339 struct zram *zram = dev_to_zram(dev); 1340 ssize_t ret; 1341 1342 down_read(&zram->init_lock); 1343 ret = scnprintf(buf, PAGE_SIZE, 1344 "%8llu %8llu 0 %8llu\n", 1345 (u64)atomic64_read(&zram->stats.failed_reads), 1346 (u64)atomic64_read(&zram->stats.failed_writes), 1347 (u64)atomic64_read(&zram->stats.notify_free)); 1348 up_read(&zram->init_lock); 1349 1350 return ret; 1351 } 1352 1353 static ssize_t mm_stat_show(struct device *dev, 1354 struct device_attribute *attr, char *buf) 1355 { 1356 struct zram *zram = dev_to_zram(dev); 1357 struct zs_pool_stats pool_stats; 1358 u64 orig_size, mem_used = 0; 1359 long max_used; 1360 ssize_t ret; 1361 1362 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1363 1364 down_read(&zram->init_lock); 1365 if (init_done(zram)) { 1366 mem_used = zs_get_total_pages(zram->mem_pool); 1367 zs_pool_stats(zram->mem_pool, &pool_stats); 1368 } 1369 1370 orig_size = atomic64_read(&zram->stats.pages_stored); 1371 max_used = atomic_long_read(&zram->stats.max_used_pages); 1372 1373 ret = scnprintf(buf, PAGE_SIZE, 1374 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1375 orig_size << PAGE_SHIFT, 1376 (u64)atomic64_read(&zram->stats.compr_data_size), 1377 mem_used << PAGE_SHIFT, 1378 zram->limit_pages << PAGE_SHIFT, 1379 max_used << PAGE_SHIFT, 1380 (u64)atomic64_read(&zram->stats.same_pages), 1381 atomic_long_read(&pool_stats.pages_compacted), 1382 (u64)atomic64_read(&zram->stats.huge_pages), 1383 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1384 up_read(&zram->init_lock); 1385 1386 return ret; 1387 } 1388 1389 #ifdef CONFIG_ZRAM_WRITEBACK 1390 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1391 static ssize_t bd_stat_show(struct device *dev, 1392 struct device_attribute *attr, char *buf) 1393 { 1394 struct zram *zram = dev_to_zram(dev); 1395 ssize_t ret; 1396 1397 down_read(&zram->init_lock); 1398 ret = scnprintf(buf, PAGE_SIZE, 1399 "%8llu %8llu %8llu\n", 1400 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1401 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1402 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1403 up_read(&zram->init_lock); 1404 1405 return ret; 1406 } 1407 #endif 1408 1409 static ssize_t debug_stat_show(struct device *dev, 1410 struct device_attribute *attr, char *buf) 1411 { 1412 int version = 1; 1413 struct zram *zram = dev_to_zram(dev); 1414 ssize_t ret; 1415 1416 down_read(&zram->init_lock); 1417 ret = scnprintf(buf, PAGE_SIZE, 1418 "version: %d\n%8llu %8llu\n", 1419 version, 1420 (u64)atomic64_read(&zram->stats.writestall), 1421 (u64)atomic64_read(&zram->stats.miss_free)); 1422 up_read(&zram->init_lock); 1423 1424 return ret; 1425 } 1426 1427 static DEVICE_ATTR_RO(io_stat); 1428 static DEVICE_ATTR_RO(mm_stat); 1429 #ifdef CONFIG_ZRAM_WRITEBACK 1430 static DEVICE_ATTR_RO(bd_stat); 1431 #endif 1432 static DEVICE_ATTR_RO(debug_stat); 1433 1434 static void zram_meta_free(struct zram *zram, u64 disksize) 1435 { 1436 size_t num_pages = disksize >> PAGE_SHIFT; 1437 size_t index; 1438 1439 /* Free all pages that are still in this zram device */ 1440 for (index = 0; index < num_pages; index++) 1441 zram_free_page(zram, index); 1442 1443 zs_destroy_pool(zram->mem_pool); 1444 vfree(zram->table); 1445 } 1446 1447 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1448 { 1449 size_t num_pages, index; 1450 1451 num_pages = disksize >> PAGE_SHIFT; 1452 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1453 if (!zram->table) 1454 return false; 1455 1456 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1457 if (!zram->mem_pool) { 1458 vfree(zram->table); 1459 return false; 1460 } 1461 1462 if (!huge_class_size) 1463 huge_class_size = zs_huge_class_size(zram->mem_pool); 1464 1465 for (index = 0; index < num_pages; index++) 1466 spin_lock_init(&zram->table[index].lock); 1467 return true; 1468 } 1469 1470 /* 1471 * To protect concurrent access to the same index entry, 1472 * caller should hold this table index entry's bit_spinlock to 1473 * indicate this index entry is accessing. 1474 */ 1475 static void zram_free_page(struct zram *zram, size_t index) 1476 { 1477 unsigned long handle; 1478 1479 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME 1480 zram->table[index].ac_time = 0; 1481 #endif 1482 1483 zram_clear_flag(zram, index, ZRAM_IDLE); 1484 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1485 zram_clear_flag(zram, index, ZRAM_PP_SLOT); 1486 zram_set_priority(zram, index, 0); 1487 1488 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1489 zram_clear_flag(zram, index, ZRAM_HUGE); 1490 atomic64_dec(&zram->stats.huge_pages); 1491 } 1492 1493 if (zram_test_flag(zram, index, ZRAM_WB)) { 1494 zram_clear_flag(zram, index, ZRAM_WB); 1495 free_block_bdev(zram, zram_get_element(zram, index)); 1496 goto out; 1497 } 1498 1499 /* 1500 * No memory is allocated for same element filled pages. 1501 * Simply clear same page flag. 1502 */ 1503 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1504 zram_clear_flag(zram, index, ZRAM_SAME); 1505 atomic64_dec(&zram->stats.same_pages); 1506 goto out; 1507 } 1508 1509 handle = zram_get_handle(zram, index); 1510 if (!handle) 1511 return; 1512 1513 zs_free(zram->mem_pool, handle); 1514 1515 atomic64_sub(zram_get_obj_size(zram, index), 1516 &zram->stats.compr_data_size); 1517 out: 1518 atomic64_dec(&zram->stats.pages_stored); 1519 zram_set_handle(zram, index, 0); 1520 zram_set_obj_size(zram, index, 0); 1521 } 1522 1523 /* 1524 * Reads (decompresses if needed) a page from zspool (zsmalloc). 1525 * Corresponding ZRAM slot should be locked. 1526 */ 1527 static int zram_read_from_zspool(struct zram *zram, struct page *page, 1528 u32 index) 1529 { 1530 struct zcomp_strm *zstrm; 1531 unsigned long handle; 1532 unsigned int size; 1533 void *src, *dst; 1534 u32 prio; 1535 int ret; 1536 1537 handle = zram_get_handle(zram, index); 1538 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1539 unsigned long value; 1540 void *mem; 1541 1542 value = handle ? zram_get_element(zram, index) : 0; 1543 mem = kmap_local_page(page); 1544 zram_fill_page(mem, PAGE_SIZE, value); 1545 kunmap_local(mem); 1546 return 0; 1547 } 1548 1549 size = zram_get_obj_size(zram, index); 1550 1551 if (size != PAGE_SIZE) { 1552 prio = zram_get_priority(zram, index); 1553 zstrm = zcomp_stream_get(zram->comps[prio]); 1554 } 1555 1556 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1557 if (size == PAGE_SIZE) { 1558 dst = kmap_local_page(page); 1559 copy_page(dst, src); 1560 kunmap_local(dst); 1561 ret = 0; 1562 } else { 1563 dst = kmap_local_page(page); 1564 ret = zcomp_decompress(zram->comps[prio], zstrm, 1565 src, size, dst); 1566 kunmap_local(dst); 1567 zcomp_stream_put(zram->comps[prio]); 1568 } 1569 zs_unmap_object(zram->mem_pool, handle); 1570 return ret; 1571 } 1572 1573 static int zram_read_page(struct zram *zram, struct page *page, u32 index, 1574 struct bio *parent) 1575 { 1576 int ret; 1577 1578 zram_slot_lock(zram, index); 1579 if (!zram_test_flag(zram, index, ZRAM_WB)) { 1580 /* Slot should be locked through out the function call */ 1581 ret = zram_read_from_zspool(zram, page, index); 1582 zram_slot_unlock(zram, index); 1583 } else { 1584 /* 1585 * The slot should be unlocked before reading from the backing 1586 * device. 1587 */ 1588 zram_slot_unlock(zram, index); 1589 1590 ret = read_from_bdev(zram, page, zram_get_element(zram, index), 1591 parent); 1592 } 1593 1594 /* Should NEVER happen. Return bio error if it does. */ 1595 if (WARN_ON(ret < 0)) 1596 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1597 1598 return ret; 1599 } 1600 1601 /* 1602 * Use a temporary buffer to decompress the page, as the decompressor 1603 * always expects a full page for the output. 1604 */ 1605 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 1606 u32 index, int offset) 1607 { 1608 struct page *page = alloc_page(GFP_NOIO); 1609 int ret; 1610 1611 if (!page) 1612 return -ENOMEM; 1613 ret = zram_read_page(zram, page, index, NULL); 1614 if (likely(!ret)) 1615 memcpy_to_bvec(bvec, page_address(page) + offset); 1616 __free_page(page); 1617 return ret; 1618 } 1619 1620 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1621 u32 index, int offset, struct bio *bio) 1622 { 1623 if (is_partial_io(bvec)) 1624 return zram_bvec_read_partial(zram, bvec, index, offset); 1625 return zram_read_page(zram, bvec->bv_page, index, bio); 1626 } 1627 1628 static int zram_write_page(struct zram *zram, struct page *page, u32 index) 1629 { 1630 int ret = 0; 1631 unsigned long alloced_pages; 1632 unsigned long handle = -ENOMEM; 1633 unsigned int comp_len = 0; 1634 void *src, *dst, *mem; 1635 struct zcomp_strm *zstrm; 1636 unsigned long element = 0; 1637 enum zram_pageflags flags = 0; 1638 1639 mem = kmap_local_page(page); 1640 if (page_same_filled(mem, &element)) { 1641 kunmap_local(mem); 1642 /* Free memory associated with this sector now. */ 1643 flags = ZRAM_SAME; 1644 atomic64_inc(&zram->stats.same_pages); 1645 goto out; 1646 } 1647 kunmap_local(mem); 1648 1649 compress_again: 1650 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1651 src = kmap_local_page(page); 1652 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, 1653 src, &comp_len); 1654 kunmap_local(src); 1655 1656 if (unlikely(ret)) { 1657 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1658 pr_err("Compression failed! err=%d\n", ret); 1659 zs_free(zram->mem_pool, handle); 1660 return ret; 1661 } 1662 1663 if (comp_len >= huge_class_size) 1664 comp_len = PAGE_SIZE; 1665 /* 1666 * handle allocation has 2 paths: 1667 * a) fast path is executed with preemption disabled (for 1668 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1669 * since we can't sleep; 1670 * b) slow path enables preemption and attempts to allocate 1671 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1672 * put per-cpu compression stream and, thus, to re-do 1673 * the compression once handle is allocated. 1674 * 1675 * if we have a 'non-null' handle here then we are coming 1676 * from the slow path and handle has already been allocated. 1677 */ 1678 if (IS_ERR_VALUE(handle)) 1679 handle = zs_malloc(zram->mem_pool, comp_len, 1680 __GFP_KSWAPD_RECLAIM | 1681 __GFP_NOWARN | 1682 __GFP_HIGHMEM | 1683 __GFP_MOVABLE); 1684 if (IS_ERR_VALUE(handle)) { 1685 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1686 atomic64_inc(&zram->stats.writestall); 1687 handle = zs_malloc(zram->mem_pool, comp_len, 1688 GFP_NOIO | __GFP_HIGHMEM | 1689 __GFP_MOVABLE); 1690 if (IS_ERR_VALUE(handle)) 1691 return PTR_ERR((void *)handle); 1692 1693 if (comp_len != PAGE_SIZE) 1694 goto compress_again; 1695 /* 1696 * If the page is not compressible, you need to acquire the 1697 * lock and execute the code below. The zcomp_stream_get() 1698 * call is needed to disable the cpu hotplug and grab the 1699 * zstrm buffer back. It is necessary that the dereferencing 1700 * of the zstrm variable below occurs correctly. 1701 */ 1702 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1703 } 1704 1705 alloced_pages = zs_get_total_pages(zram->mem_pool); 1706 update_used_max(zram, alloced_pages); 1707 1708 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1709 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1710 zs_free(zram->mem_pool, handle); 1711 return -ENOMEM; 1712 } 1713 1714 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1715 1716 src = zstrm->buffer; 1717 if (comp_len == PAGE_SIZE) 1718 src = kmap_local_page(page); 1719 memcpy(dst, src, comp_len); 1720 if (comp_len == PAGE_SIZE) 1721 kunmap_local(src); 1722 1723 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1724 zs_unmap_object(zram->mem_pool, handle); 1725 atomic64_add(comp_len, &zram->stats.compr_data_size); 1726 out: 1727 /* 1728 * Free memory associated with this sector 1729 * before overwriting unused sectors. 1730 */ 1731 zram_slot_lock(zram, index); 1732 zram_free_page(zram, index); 1733 1734 if (comp_len == PAGE_SIZE) { 1735 zram_set_flag(zram, index, ZRAM_HUGE); 1736 atomic64_inc(&zram->stats.huge_pages); 1737 atomic64_inc(&zram->stats.huge_pages_since); 1738 } 1739 1740 if (flags) { 1741 zram_set_flag(zram, index, flags); 1742 zram_set_element(zram, index, element); 1743 } else { 1744 zram_set_handle(zram, index, handle); 1745 zram_set_obj_size(zram, index, comp_len); 1746 } 1747 zram_slot_unlock(zram, index); 1748 1749 /* Update stats */ 1750 atomic64_inc(&zram->stats.pages_stored); 1751 return ret; 1752 } 1753 1754 /* 1755 * This is a partial IO. Read the full page before writing the changes. 1756 */ 1757 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 1758 u32 index, int offset, struct bio *bio) 1759 { 1760 struct page *page = alloc_page(GFP_NOIO); 1761 int ret; 1762 1763 if (!page) 1764 return -ENOMEM; 1765 1766 ret = zram_read_page(zram, page, index, bio); 1767 if (!ret) { 1768 memcpy_from_bvec(page_address(page) + offset, bvec); 1769 ret = zram_write_page(zram, page, index); 1770 } 1771 __free_page(page); 1772 return ret; 1773 } 1774 1775 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1776 u32 index, int offset, struct bio *bio) 1777 { 1778 if (is_partial_io(bvec)) 1779 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 1780 return zram_write_page(zram, bvec->bv_page, index); 1781 } 1782 1783 #ifdef CONFIG_ZRAM_MULTI_COMP 1784 #define RECOMPRESS_IDLE (1 << 0) 1785 #define RECOMPRESS_HUGE (1 << 1) 1786 1787 static int scan_slots_for_recompress(struct zram *zram, u32 mode, 1788 struct zram_pp_ctl *ctl) 1789 { 1790 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1791 struct zram_pp_slot *pps = NULL; 1792 unsigned long index; 1793 1794 for (index = 0; index < nr_pages; index++) { 1795 if (!pps) 1796 pps = kmalloc(sizeof(*pps), GFP_KERNEL); 1797 if (!pps) 1798 return -ENOMEM; 1799 1800 INIT_LIST_HEAD(&pps->entry); 1801 1802 zram_slot_lock(zram, index); 1803 if (!zram_allocated(zram, index)) 1804 goto next; 1805 1806 if (mode & RECOMPRESS_IDLE && 1807 !zram_test_flag(zram, index, ZRAM_IDLE)) 1808 goto next; 1809 1810 if (mode & RECOMPRESS_HUGE && 1811 !zram_test_flag(zram, index, ZRAM_HUGE)) 1812 goto next; 1813 1814 if (zram_test_flag(zram, index, ZRAM_WB) || 1815 zram_test_flag(zram, index, ZRAM_SAME) || 1816 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1817 goto next; 1818 1819 pps->index = index; 1820 place_pp_slot(zram, ctl, pps); 1821 pps = NULL; 1822 next: 1823 zram_slot_unlock(zram, index); 1824 } 1825 1826 kfree(pps); 1827 return 0; 1828 } 1829 1830 /* 1831 * This function will decompress (unless it's ZRAM_HUGE) the page and then 1832 * attempt to compress it using provided compression algorithm priority 1833 * (which is potentially more effective). 1834 * 1835 * Corresponding ZRAM slot should be locked. 1836 */ 1837 static int recompress_slot(struct zram *zram, u32 index, struct page *page, 1838 u64 *num_recomp_pages, u32 threshold, u32 prio, 1839 u32 prio_max) 1840 { 1841 struct zcomp_strm *zstrm = NULL; 1842 unsigned long handle_old; 1843 unsigned long handle_new; 1844 unsigned int comp_len_old; 1845 unsigned int comp_len_new; 1846 unsigned int class_index_old; 1847 unsigned int class_index_new; 1848 u32 num_recomps = 0; 1849 void *src, *dst; 1850 int ret; 1851 1852 handle_old = zram_get_handle(zram, index); 1853 if (!handle_old) 1854 return -EINVAL; 1855 1856 comp_len_old = zram_get_obj_size(zram, index); 1857 /* 1858 * Do not recompress objects that are already "small enough". 1859 */ 1860 if (comp_len_old < threshold) 1861 return 0; 1862 1863 ret = zram_read_from_zspool(zram, page, index); 1864 if (ret) 1865 return ret; 1866 1867 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 1868 /* 1869 * Iterate the secondary comp algorithms list (in order of priority) 1870 * and try to recompress the page. 1871 */ 1872 for (; prio < prio_max; prio++) { 1873 if (!zram->comps[prio]) 1874 continue; 1875 1876 /* 1877 * Skip if the object is already re-compressed with a higher 1878 * priority algorithm (or same algorithm). 1879 */ 1880 if (prio <= zram_get_priority(zram, index)) 1881 continue; 1882 1883 num_recomps++; 1884 zstrm = zcomp_stream_get(zram->comps[prio]); 1885 src = kmap_local_page(page); 1886 ret = zcomp_compress(zram->comps[prio], zstrm, 1887 src, &comp_len_new); 1888 kunmap_local(src); 1889 1890 if (ret) { 1891 zcomp_stream_put(zram->comps[prio]); 1892 return ret; 1893 } 1894 1895 class_index_new = zs_lookup_class_index(zram->mem_pool, 1896 comp_len_new); 1897 1898 /* Continue until we make progress */ 1899 if (class_index_new >= class_index_old || 1900 (threshold && comp_len_new >= threshold)) { 1901 zcomp_stream_put(zram->comps[prio]); 1902 continue; 1903 } 1904 1905 /* Recompression was successful so break out */ 1906 break; 1907 } 1908 1909 /* 1910 * We did not try to recompress, e.g. when we have only one 1911 * secondary algorithm and the page is already recompressed 1912 * using that algorithm 1913 */ 1914 if (!zstrm) 1915 return 0; 1916 1917 /* 1918 * Decrement the limit (if set) on pages we can recompress, even 1919 * when current recompression was unsuccessful or did not compress 1920 * the page below the threshold, because we still spent resources 1921 * on it. 1922 */ 1923 if (*num_recomp_pages) 1924 *num_recomp_pages -= 1; 1925 1926 if (class_index_new >= class_index_old) { 1927 /* 1928 * Secondary algorithms failed to re-compress the page 1929 * in a way that would save memory, mark the object as 1930 * incompressible so that we will not try to compress 1931 * it again. 1932 * 1933 * We need to make sure that all secondary algorithms have 1934 * failed, so we test if the number of recompressions matches 1935 * the number of active secondary algorithms. 1936 */ 1937 if (num_recomps == zram->num_active_comps - 1) 1938 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1939 return 0; 1940 } 1941 1942 /* Successful recompression but above threshold */ 1943 if (threshold && comp_len_new >= threshold) 1944 return 0; 1945 1946 /* 1947 * No direct reclaim (slow path) for handle allocation and no 1948 * re-compression attempt (unlike in zram_write_bvec()) since 1949 * we already have stored that object in zsmalloc. If we cannot 1950 * alloc memory for recompressed object then we bail out and 1951 * simply keep the old (existing) object in zsmalloc. 1952 */ 1953 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 1954 __GFP_KSWAPD_RECLAIM | 1955 __GFP_NOWARN | 1956 __GFP_HIGHMEM | 1957 __GFP_MOVABLE); 1958 if (IS_ERR_VALUE(handle_new)) { 1959 zcomp_stream_put(zram->comps[prio]); 1960 return PTR_ERR((void *)handle_new); 1961 } 1962 1963 dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); 1964 memcpy(dst, zstrm->buffer, comp_len_new); 1965 zcomp_stream_put(zram->comps[prio]); 1966 1967 zs_unmap_object(zram->mem_pool, handle_new); 1968 1969 zram_free_page(zram, index); 1970 zram_set_handle(zram, index, handle_new); 1971 zram_set_obj_size(zram, index, comp_len_new); 1972 zram_set_priority(zram, index, prio); 1973 1974 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 1975 atomic64_inc(&zram->stats.pages_stored); 1976 1977 return 0; 1978 } 1979 1980 static ssize_t recompress_store(struct device *dev, 1981 struct device_attribute *attr, 1982 const char *buf, size_t len) 1983 { 1984 u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; 1985 struct zram *zram = dev_to_zram(dev); 1986 char *args, *param, *val, *algo = NULL; 1987 u64 num_recomp_pages = ULLONG_MAX; 1988 struct zram_pp_ctl *ctl = NULL; 1989 struct zram_pp_slot *pps; 1990 u32 mode = 0, threshold = 0; 1991 struct page *page; 1992 ssize_t ret; 1993 1994 args = skip_spaces(buf); 1995 while (*args) { 1996 args = next_arg(args, ¶m, &val); 1997 1998 if (!val || !*val) 1999 return -EINVAL; 2000 2001 if (!strcmp(param, "type")) { 2002 if (!strcmp(val, "idle")) 2003 mode = RECOMPRESS_IDLE; 2004 if (!strcmp(val, "huge")) 2005 mode = RECOMPRESS_HUGE; 2006 if (!strcmp(val, "huge_idle")) 2007 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 2008 continue; 2009 } 2010 2011 if (!strcmp(param, "max_pages")) { 2012 /* 2013 * Limit the number of entries (pages) we attempt to 2014 * recompress. 2015 */ 2016 ret = kstrtoull(val, 10, &num_recomp_pages); 2017 if (ret) 2018 return ret; 2019 continue; 2020 } 2021 2022 if (!strcmp(param, "threshold")) { 2023 /* 2024 * We will re-compress only idle objects equal or 2025 * greater in size than watermark. 2026 */ 2027 ret = kstrtouint(val, 10, &threshold); 2028 if (ret) 2029 return ret; 2030 continue; 2031 } 2032 2033 if (!strcmp(param, "algo")) { 2034 algo = val; 2035 continue; 2036 } 2037 2038 if (!strcmp(param, "priority")) { 2039 ret = kstrtouint(val, 10, &prio); 2040 if (ret) 2041 return ret; 2042 2043 if (prio == ZRAM_PRIMARY_COMP) 2044 prio = ZRAM_SECONDARY_COMP; 2045 2046 prio_max = min(prio + 1, ZRAM_MAX_COMPS); 2047 continue; 2048 } 2049 } 2050 2051 if (threshold >= huge_class_size) 2052 return -EINVAL; 2053 2054 down_read(&zram->init_lock); 2055 if (!init_done(zram)) { 2056 ret = -EINVAL; 2057 goto release_init_lock; 2058 } 2059 2060 /* Do not permit concurrent post-processing actions. */ 2061 if (atomic_xchg(&zram->pp_in_progress, 1)) { 2062 up_read(&zram->init_lock); 2063 return -EAGAIN; 2064 } 2065 2066 if (algo) { 2067 bool found = false; 2068 2069 for (; prio < ZRAM_MAX_COMPS; prio++) { 2070 if (!zram->comp_algs[prio]) 2071 continue; 2072 2073 if (!strcmp(zram->comp_algs[prio], algo)) { 2074 prio_max = min(prio + 1, ZRAM_MAX_COMPS); 2075 found = true; 2076 break; 2077 } 2078 } 2079 2080 if (!found) { 2081 ret = -EINVAL; 2082 goto release_init_lock; 2083 } 2084 } 2085 2086 page = alloc_page(GFP_KERNEL); 2087 if (!page) { 2088 ret = -ENOMEM; 2089 goto release_init_lock; 2090 } 2091 2092 ctl = init_pp_ctl(); 2093 if (!ctl) { 2094 ret = -ENOMEM; 2095 goto release_init_lock; 2096 } 2097 2098 scan_slots_for_recompress(zram, mode, ctl); 2099 2100 ret = len; 2101 while ((pps = select_pp_slot(ctl))) { 2102 int err = 0; 2103 2104 if (!num_recomp_pages) 2105 break; 2106 2107 zram_slot_lock(zram, pps->index); 2108 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) 2109 goto next; 2110 2111 err = recompress_slot(zram, pps->index, page, 2112 &num_recomp_pages, threshold, 2113 prio, prio_max); 2114 next: 2115 zram_slot_unlock(zram, pps->index); 2116 release_pp_slot(zram, pps); 2117 2118 if (err) { 2119 ret = err; 2120 break; 2121 } 2122 2123 cond_resched(); 2124 } 2125 2126 __free_page(page); 2127 2128 release_init_lock: 2129 release_pp_ctl(zram, ctl); 2130 atomic_set(&zram->pp_in_progress, 0); 2131 up_read(&zram->init_lock); 2132 return ret; 2133 } 2134 #endif 2135 2136 static void zram_bio_discard(struct zram *zram, struct bio *bio) 2137 { 2138 size_t n = bio->bi_iter.bi_size; 2139 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2140 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2141 SECTOR_SHIFT; 2142 2143 /* 2144 * zram manages data in physical block size units. Because logical block 2145 * size isn't identical with physical block size on some arch, we 2146 * could get a discard request pointing to a specific offset within a 2147 * certain physical block. Although we can handle this request by 2148 * reading that physiclal block and decompressing and partially zeroing 2149 * and re-compressing and then re-storing it, this isn't reasonable 2150 * because our intent with a discard request is to save memory. So 2151 * skipping this logical block is appropriate here. 2152 */ 2153 if (offset) { 2154 if (n <= (PAGE_SIZE - offset)) 2155 return; 2156 2157 n -= (PAGE_SIZE - offset); 2158 index++; 2159 } 2160 2161 while (n >= PAGE_SIZE) { 2162 zram_slot_lock(zram, index); 2163 zram_free_page(zram, index); 2164 zram_slot_unlock(zram, index); 2165 atomic64_inc(&zram->stats.notify_free); 2166 index++; 2167 n -= PAGE_SIZE; 2168 } 2169 2170 bio_endio(bio); 2171 } 2172 2173 static void zram_bio_read(struct zram *zram, struct bio *bio) 2174 { 2175 unsigned long start_time = bio_start_io_acct(bio); 2176 struct bvec_iter iter = bio->bi_iter; 2177 2178 do { 2179 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2180 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2181 SECTOR_SHIFT; 2182 struct bio_vec bv = bio_iter_iovec(bio, iter); 2183 2184 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2185 2186 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 2187 atomic64_inc(&zram->stats.failed_reads); 2188 bio->bi_status = BLK_STS_IOERR; 2189 break; 2190 } 2191 flush_dcache_page(bv.bv_page); 2192 2193 zram_slot_lock(zram, index); 2194 zram_accessed(zram, index); 2195 zram_slot_unlock(zram, index); 2196 2197 bio_advance_iter_single(bio, &iter, bv.bv_len); 2198 } while (iter.bi_size); 2199 2200 bio_end_io_acct(bio, start_time); 2201 bio_endio(bio); 2202 } 2203 2204 static void zram_bio_write(struct zram *zram, struct bio *bio) 2205 { 2206 unsigned long start_time = bio_start_io_acct(bio); 2207 struct bvec_iter iter = bio->bi_iter; 2208 2209 do { 2210 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 2211 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 2212 SECTOR_SHIFT; 2213 struct bio_vec bv = bio_iter_iovec(bio, iter); 2214 2215 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 2216 2217 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 2218 atomic64_inc(&zram->stats.failed_writes); 2219 bio->bi_status = BLK_STS_IOERR; 2220 break; 2221 } 2222 2223 zram_slot_lock(zram, index); 2224 zram_accessed(zram, index); 2225 zram_slot_unlock(zram, index); 2226 2227 bio_advance_iter_single(bio, &iter, bv.bv_len); 2228 } while (iter.bi_size); 2229 2230 bio_end_io_acct(bio, start_time); 2231 bio_endio(bio); 2232 } 2233 2234 /* 2235 * Handler function for all zram I/O requests. 2236 */ 2237 static void zram_submit_bio(struct bio *bio) 2238 { 2239 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 2240 2241 switch (bio_op(bio)) { 2242 case REQ_OP_READ: 2243 zram_bio_read(zram, bio); 2244 break; 2245 case REQ_OP_WRITE: 2246 zram_bio_write(zram, bio); 2247 break; 2248 case REQ_OP_DISCARD: 2249 case REQ_OP_WRITE_ZEROES: 2250 zram_bio_discard(zram, bio); 2251 break; 2252 default: 2253 WARN_ON_ONCE(1); 2254 bio_endio(bio); 2255 } 2256 } 2257 2258 static void zram_slot_free_notify(struct block_device *bdev, 2259 unsigned long index) 2260 { 2261 struct zram *zram; 2262 2263 zram = bdev->bd_disk->private_data; 2264 2265 atomic64_inc(&zram->stats.notify_free); 2266 if (!zram_slot_trylock(zram, index)) { 2267 atomic64_inc(&zram->stats.miss_free); 2268 return; 2269 } 2270 2271 zram_free_page(zram, index); 2272 zram_slot_unlock(zram, index); 2273 } 2274 2275 static void zram_comp_params_reset(struct zram *zram) 2276 { 2277 u32 prio; 2278 2279 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2280 comp_params_reset(zram, prio); 2281 } 2282 } 2283 2284 static void zram_destroy_comps(struct zram *zram) 2285 { 2286 u32 prio; 2287 2288 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2289 struct zcomp *comp = zram->comps[prio]; 2290 2291 zram->comps[prio] = NULL; 2292 if (!comp) 2293 continue; 2294 zcomp_destroy(comp); 2295 zram->num_active_comps--; 2296 } 2297 2298 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2299 /* Do not free statically defined compression algorithms */ 2300 if (zram->comp_algs[prio] != default_compressor) 2301 kfree(zram->comp_algs[prio]); 2302 zram->comp_algs[prio] = NULL; 2303 } 2304 2305 zram_comp_params_reset(zram); 2306 } 2307 2308 static void zram_reset_device(struct zram *zram) 2309 { 2310 down_write(&zram->init_lock); 2311 2312 zram->limit_pages = 0; 2313 2314 if (!init_done(zram)) { 2315 up_write(&zram->init_lock); 2316 return; 2317 } 2318 2319 set_capacity_and_notify(zram->disk, 0); 2320 part_stat_set_all(zram->disk->part0, 0); 2321 2322 /* I/O operation under all of CPU are done so let's free */ 2323 zram_meta_free(zram, zram->disksize); 2324 zram->disksize = 0; 2325 zram_destroy_comps(zram); 2326 memset(&zram->stats, 0, sizeof(zram->stats)); 2327 atomic_set(&zram->pp_in_progress, 0); 2328 reset_bdev(zram); 2329 2330 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2331 up_write(&zram->init_lock); 2332 } 2333 2334 static ssize_t disksize_store(struct device *dev, 2335 struct device_attribute *attr, const char *buf, size_t len) 2336 { 2337 u64 disksize; 2338 struct zcomp *comp; 2339 struct zram *zram = dev_to_zram(dev); 2340 int err; 2341 u32 prio; 2342 2343 disksize = memparse(buf, NULL); 2344 if (!disksize) 2345 return -EINVAL; 2346 2347 down_write(&zram->init_lock); 2348 if (init_done(zram)) { 2349 pr_info("Cannot change disksize for initialized device\n"); 2350 err = -EBUSY; 2351 goto out_unlock; 2352 } 2353 2354 disksize = PAGE_ALIGN(disksize); 2355 if (!zram_meta_alloc(zram, disksize)) { 2356 err = -ENOMEM; 2357 goto out_unlock; 2358 } 2359 2360 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 2361 if (!zram->comp_algs[prio]) 2362 continue; 2363 2364 comp = zcomp_create(zram->comp_algs[prio], 2365 &zram->params[prio]); 2366 if (IS_ERR(comp)) { 2367 pr_err("Cannot initialise %s compressing backend\n", 2368 zram->comp_algs[prio]); 2369 err = PTR_ERR(comp); 2370 goto out_free_comps; 2371 } 2372 2373 zram->comps[prio] = comp; 2374 zram->num_active_comps++; 2375 } 2376 zram->disksize = disksize; 2377 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2378 up_write(&zram->init_lock); 2379 2380 return len; 2381 2382 out_free_comps: 2383 zram_destroy_comps(zram); 2384 zram_meta_free(zram, disksize); 2385 out_unlock: 2386 up_write(&zram->init_lock); 2387 return err; 2388 } 2389 2390 static ssize_t reset_store(struct device *dev, 2391 struct device_attribute *attr, const char *buf, size_t len) 2392 { 2393 int ret; 2394 unsigned short do_reset; 2395 struct zram *zram; 2396 struct gendisk *disk; 2397 2398 ret = kstrtou16(buf, 10, &do_reset); 2399 if (ret) 2400 return ret; 2401 2402 if (!do_reset) 2403 return -EINVAL; 2404 2405 zram = dev_to_zram(dev); 2406 disk = zram->disk; 2407 2408 mutex_lock(&disk->open_mutex); 2409 /* Do not reset an active device or claimed device */ 2410 if (disk_openers(disk) || zram->claim) { 2411 mutex_unlock(&disk->open_mutex); 2412 return -EBUSY; 2413 } 2414 2415 /* From now on, anyone can't open /dev/zram[0-9] */ 2416 zram->claim = true; 2417 mutex_unlock(&disk->open_mutex); 2418 2419 /* Make sure all the pending I/O are finished */ 2420 sync_blockdev(disk->part0); 2421 zram_reset_device(zram); 2422 2423 mutex_lock(&disk->open_mutex); 2424 zram->claim = false; 2425 mutex_unlock(&disk->open_mutex); 2426 2427 return len; 2428 } 2429 2430 static int zram_open(struct gendisk *disk, blk_mode_t mode) 2431 { 2432 struct zram *zram = disk->private_data; 2433 2434 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2435 2436 /* zram was claimed to reset so open request fails */ 2437 if (zram->claim) 2438 return -EBUSY; 2439 return 0; 2440 } 2441 2442 static const struct block_device_operations zram_devops = { 2443 .open = zram_open, 2444 .submit_bio = zram_submit_bio, 2445 .swap_slot_free_notify = zram_slot_free_notify, 2446 .owner = THIS_MODULE 2447 }; 2448 2449 static DEVICE_ATTR_WO(compact); 2450 static DEVICE_ATTR_RW(disksize); 2451 static DEVICE_ATTR_RO(initstate); 2452 static DEVICE_ATTR_WO(reset); 2453 static DEVICE_ATTR_WO(mem_limit); 2454 static DEVICE_ATTR_WO(mem_used_max); 2455 static DEVICE_ATTR_WO(idle); 2456 static DEVICE_ATTR_RW(max_comp_streams); 2457 static DEVICE_ATTR_RW(comp_algorithm); 2458 #ifdef CONFIG_ZRAM_WRITEBACK 2459 static DEVICE_ATTR_RW(backing_dev); 2460 static DEVICE_ATTR_WO(writeback); 2461 static DEVICE_ATTR_RW(writeback_limit); 2462 static DEVICE_ATTR_RW(writeback_limit_enable); 2463 #endif 2464 #ifdef CONFIG_ZRAM_MULTI_COMP 2465 static DEVICE_ATTR_RW(recomp_algorithm); 2466 static DEVICE_ATTR_WO(recompress); 2467 #endif 2468 static DEVICE_ATTR_WO(algorithm_params); 2469 2470 static struct attribute *zram_disk_attrs[] = { 2471 &dev_attr_disksize.attr, 2472 &dev_attr_initstate.attr, 2473 &dev_attr_reset.attr, 2474 &dev_attr_compact.attr, 2475 &dev_attr_mem_limit.attr, 2476 &dev_attr_mem_used_max.attr, 2477 &dev_attr_idle.attr, 2478 &dev_attr_max_comp_streams.attr, 2479 &dev_attr_comp_algorithm.attr, 2480 #ifdef CONFIG_ZRAM_WRITEBACK 2481 &dev_attr_backing_dev.attr, 2482 &dev_attr_writeback.attr, 2483 &dev_attr_writeback_limit.attr, 2484 &dev_attr_writeback_limit_enable.attr, 2485 #endif 2486 &dev_attr_io_stat.attr, 2487 &dev_attr_mm_stat.attr, 2488 #ifdef CONFIG_ZRAM_WRITEBACK 2489 &dev_attr_bd_stat.attr, 2490 #endif 2491 &dev_attr_debug_stat.attr, 2492 #ifdef CONFIG_ZRAM_MULTI_COMP 2493 &dev_attr_recomp_algorithm.attr, 2494 &dev_attr_recompress.attr, 2495 #endif 2496 &dev_attr_algorithm_params.attr, 2497 NULL, 2498 }; 2499 2500 ATTRIBUTE_GROUPS(zram_disk); 2501 2502 /* 2503 * Allocate and initialize new zram device. the function returns 2504 * '>= 0' device_id upon success, and negative value otherwise. 2505 */ 2506 static int zram_add(void) 2507 { 2508 struct queue_limits lim = { 2509 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, 2510 /* 2511 * To ensure that we always get PAGE_SIZE aligned and 2512 * n*PAGE_SIZED sized I/O requests. 2513 */ 2514 .physical_block_size = PAGE_SIZE, 2515 .io_min = PAGE_SIZE, 2516 .io_opt = PAGE_SIZE, 2517 .max_hw_discard_sectors = UINT_MAX, 2518 /* 2519 * zram_bio_discard() will clear all logical blocks if logical 2520 * block size is identical with physical block size(PAGE_SIZE). 2521 * But if it is different, we will skip discarding some parts of 2522 * logical blocks in the part of the request range which isn't 2523 * aligned to physical block size. So we can't ensure that all 2524 * discarded logical blocks are zeroed. 2525 */ 2526 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE 2527 .max_write_zeroes_sectors = UINT_MAX, 2528 #endif 2529 .features = BLK_FEAT_STABLE_WRITES | 2530 BLK_FEAT_SYNCHRONOUS, 2531 }; 2532 struct zram *zram; 2533 int ret, device_id; 2534 2535 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 2536 if (!zram) 2537 return -ENOMEM; 2538 2539 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 2540 if (ret < 0) 2541 goto out_free_dev; 2542 device_id = ret; 2543 2544 init_rwsem(&zram->init_lock); 2545 #ifdef CONFIG_ZRAM_WRITEBACK 2546 spin_lock_init(&zram->wb_limit_lock); 2547 #endif 2548 2549 /* gendisk structure */ 2550 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 2551 if (IS_ERR(zram->disk)) { 2552 pr_err("Error allocating disk structure for device %d\n", 2553 device_id); 2554 ret = PTR_ERR(zram->disk); 2555 goto out_free_idr; 2556 } 2557 2558 zram->disk->major = zram_major; 2559 zram->disk->first_minor = device_id; 2560 zram->disk->minors = 1; 2561 zram->disk->flags |= GENHD_FL_NO_PART; 2562 zram->disk->fops = &zram_devops; 2563 zram->disk->private_data = zram; 2564 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 2565 atomic_set(&zram->pp_in_progress, 0); 2566 2567 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 2568 set_capacity(zram->disk, 0); 2569 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 2570 if (ret) 2571 goto out_cleanup_disk; 2572 2573 zram_comp_params_reset(zram); 2574 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2575 2576 zram_debugfs_register(zram); 2577 pr_info("Added device: %s\n", zram->disk->disk_name); 2578 return device_id; 2579 2580 out_cleanup_disk: 2581 put_disk(zram->disk); 2582 out_free_idr: 2583 idr_remove(&zram_index_idr, device_id); 2584 out_free_dev: 2585 kfree(zram); 2586 return ret; 2587 } 2588 2589 static int zram_remove(struct zram *zram) 2590 { 2591 bool claimed; 2592 2593 mutex_lock(&zram->disk->open_mutex); 2594 if (disk_openers(zram->disk)) { 2595 mutex_unlock(&zram->disk->open_mutex); 2596 return -EBUSY; 2597 } 2598 2599 claimed = zram->claim; 2600 if (!claimed) 2601 zram->claim = true; 2602 mutex_unlock(&zram->disk->open_mutex); 2603 2604 zram_debugfs_unregister(zram); 2605 2606 if (claimed) { 2607 /* 2608 * If we were claimed by reset_store(), del_gendisk() will 2609 * wait until reset_store() is done, so nothing need to do. 2610 */ 2611 ; 2612 } else { 2613 /* Make sure all the pending I/O are finished */ 2614 sync_blockdev(zram->disk->part0); 2615 zram_reset_device(zram); 2616 } 2617 2618 pr_info("Removed device: %s\n", zram->disk->disk_name); 2619 2620 del_gendisk(zram->disk); 2621 2622 /* del_gendisk drains pending reset_store */ 2623 WARN_ON_ONCE(claimed && zram->claim); 2624 2625 /* 2626 * disksize_store() may be called in between zram_reset_device() 2627 * and del_gendisk(), so run the last reset to avoid leaking 2628 * anything allocated with disksize_store() 2629 */ 2630 zram_reset_device(zram); 2631 2632 put_disk(zram->disk); 2633 kfree(zram); 2634 return 0; 2635 } 2636 2637 /* zram-control sysfs attributes */ 2638 2639 /* 2640 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2641 * sense that reading from this file does alter the state of your system -- it 2642 * creates a new un-initialized zram device and returns back this device's 2643 * device_id (or an error code if it fails to create a new device). 2644 */ 2645 static ssize_t hot_add_show(const struct class *class, 2646 const struct class_attribute *attr, 2647 char *buf) 2648 { 2649 int ret; 2650 2651 mutex_lock(&zram_index_mutex); 2652 ret = zram_add(); 2653 mutex_unlock(&zram_index_mutex); 2654 2655 if (ret < 0) 2656 return ret; 2657 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2658 } 2659 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 2660 static struct class_attribute class_attr_hot_add = 2661 __ATTR(hot_add, 0400, hot_add_show, NULL); 2662 2663 static ssize_t hot_remove_store(const struct class *class, 2664 const struct class_attribute *attr, 2665 const char *buf, 2666 size_t count) 2667 { 2668 struct zram *zram; 2669 int ret, dev_id; 2670 2671 /* dev_id is gendisk->first_minor, which is `int' */ 2672 ret = kstrtoint(buf, 10, &dev_id); 2673 if (ret) 2674 return ret; 2675 if (dev_id < 0) 2676 return -EINVAL; 2677 2678 mutex_lock(&zram_index_mutex); 2679 2680 zram = idr_find(&zram_index_idr, dev_id); 2681 if (zram) { 2682 ret = zram_remove(zram); 2683 if (!ret) 2684 idr_remove(&zram_index_idr, dev_id); 2685 } else { 2686 ret = -ENODEV; 2687 } 2688 2689 mutex_unlock(&zram_index_mutex); 2690 return ret ? ret : count; 2691 } 2692 static CLASS_ATTR_WO(hot_remove); 2693 2694 static struct attribute *zram_control_class_attrs[] = { 2695 &class_attr_hot_add.attr, 2696 &class_attr_hot_remove.attr, 2697 NULL, 2698 }; 2699 ATTRIBUTE_GROUPS(zram_control_class); 2700 2701 static struct class zram_control_class = { 2702 .name = "zram-control", 2703 .class_groups = zram_control_class_groups, 2704 }; 2705 2706 static int zram_remove_cb(int id, void *ptr, void *data) 2707 { 2708 WARN_ON_ONCE(zram_remove(ptr)); 2709 return 0; 2710 } 2711 2712 static void destroy_devices(void) 2713 { 2714 class_unregister(&zram_control_class); 2715 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2716 zram_debugfs_destroy(); 2717 idr_destroy(&zram_index_idr); 2718 unregister_blkdev(zram_major, "zram"); 2719 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2720 } 2721 2722 static int __init zram_init(void) 2723 { 2724 struct zram_table_entry zram_te; 2725 int ret; 2726 2727 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); 2728 2729 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2730 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2731 if (ret < 0) 2732 return ret; 2733 2734 ret = class_register(&zram_control_class); 2735 if (ret) { 2736 pr_err("Unable to register zram-control class\n"); 2737 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2738 return ret; 2739 } 2740 2741 zram_debugfs_create(); 2742 zram_major = register_blkdev(0, "zram"); 2743 if (zram_major <= 0) { 2744 pr_err("Unable to get major number\n"); 2745 class_unregister(&zram_control_class); 2746 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2747 return -EBUSY; 2748 } 2749 2750 while (num_devices != 0) { 2751 mutex_lock(&zram_index_mutex); 2752 ret = zram_add(); 2753 mutex_unlock(&zram_index_mutex); 2754 if (ret < 0) 2755 goto out_error; 2756 num_devices--; 2757 } 2758 2759 return 0; 2760 2761 out_error: 2762 destroy_devices(); 2763 return ret; 2764 } 2765 2766 static void __exit zram_exit(void) 2767 { 2768 destroy_devices(); 2769 } 2770 2771 module_init(zram_init); 2772 module_exit(zram_exit); 2773 2774 module_param(num_devices, uint, 0); 2775 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2776 2777 MODULE_LICENSE("Dual BSD/GPL"); 2778 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2779 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2780