1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * z3fold.c 4 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 8 * This implementation is based on zbud written by Seth Jennings. 9 * 10 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is used. 17 * 18 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 21 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 23 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 26 #include <linux/atomic.h> 27 #include <linux/sched.h> 28 #include <linux/cpumask.h> 29 #include <linux/list.h> 30 #include <linux/mm.h> 31 #include <linux/module.h> 32 #include <linux/page-flags.h> 33 #include <linux/migrate.h> 34 #include <linux/node.h> 35 #include <linux/compaction.h> 36 #include <linux/percpu.h> 37 #include <linux/preempt.h> 38 #include <linux/workqueue.h> 39 #include <linux/slab.h> 40 #include <linux/spinlock.h> 41 #include <linux/zpool.h> 42 #include <linux/kmemleak.h> 43 44 /* 45 * NCHUNKS_ORDER determines the internal allocation granularity, effectively 46 * adjusting internal fragmentation. It also determines the number of 47 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 48 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 49 * in the beginning of an allocated page are occupied by z3fold header, so 50 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 51 * which shows the max number of free chunks in z3fold page, also there will 52 * be 63, or 62, respectively, freelists per pool. 53 */ 54 #define NCHUNKS_ORDER 6 55 56 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 57 #define CHUNK_SIZE (1 << CHUNK_SHIFT) 58 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 59 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 60 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 61 #define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) 62 63 #define BUDDY_MASK (0x3) 64 #define BUDDY_SHIFT 2 65 #define SLOTS_ALIGN (0x40) 66 67 /***************** 68 * Structures 69 *****************/ 70 struct z3fold_pool; 71 struct z3fold_ops { 72 int (*evict)(struct z3fold_pool *pool, unsigned long handle); 73 }; 74 75 enum buddy { 76 HEADLESS = 0, 77 FIRST, 78 MIDDLE, 79 LAST, 80 BUDDIES_MAX = LAST 81 }; 82 83 struct z3fold_buddy_slots { 84 /* 85 * we are using BUDDY_MASK in handle_to_buddy etc. so there should 86 * be enough slots to hold all possible variants 87 */ 88 unsigned long slot[BUDDY_MASK + 1]; 89 unsigned long pool; /* back link */ 90 rwlock_t lock; 91 }; 92 #define HANDLE_FLAG_MASK (0x03) 93 94 /* 95 * struct z3fold_header - z3fold page metadata occupying first chunks of each 96 * z3fold page, except for HEADLESS pages 97 * @buddy: links the z3fold page into the relevant list in the 98 * pool 99 * @page_lock: per-page lock 100 * @refcount: reference count for the z3fold page 101 * @work: work_struct for page layout optimization 102 * @slots: pointer to the structure holding buddy slots 103 * @pool: pointer to the containing pool 104 * @cpu: CPU which this page "belongs" to 105 * @first_chunks: the size of the first buddy in chunks, 0 if free 106 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 107 * @last_chunks: the size of the last buddy in chunks, 0 if free 108 * @first_num: the starting number (for the first handle) 109 * @mapped_count: the number of objects currently mapped 110 */ 111 struct z3fold_header { 112 struct list_head buddy; 113 spinlock_t page_lock; 114 struct kref refcount; 115 struct work_struct work; 116 struct z3fold_buddy_slots *slots; 117 struct z3fold_pool *pool; 118 short cpu; 119 unsigned short first_chunks; 120 unsigned short middle_chunks; 121 unsigned short last_chunks; 122 unsigned short start_middle; 123 unsigned short first_num:2; 124 unsigned short mapped_count:2; 125 unsigned short foreign_handles:2; 126 }; 127 128 /** 129 * struct z3fold_pool - stores metadata for each z3fold pool 130 * @name: pool name 131 * @lock: protects pool unbuddied/lru lists 132 * @stale_lock: protects pool stale page list 133 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 134 * buddies; the list each z3fold page is added to depends on 135 * the size of its free region. 136 * @lru: list tracking the z3fold pages in LRU order by most recently 137 * added buddy. 138 * @stale: list of pages marked for freeing 139 * @pages_nr: number of z3fold pages in the pool. 140 * @c_handle: cache for z3fold_buddy_slots allocation 141 * @ops: pointer to a structure of user defined operations specified at 142 * pool creation time. 143 * @zpool: zpool driver 144 * @zpool_ops: zpool operations structure with an evict callback 145 * @compact_wq: workqueue for page layout background optimization 146 * @release_wq: workqueue for safe page release 147 * @work: work_struct for safe page release 148 * 149 * This structure is allocated at pool creation time and maintains metadata 150 * pertaining to a particular z3fold pool. 151 */ 152 struct z3fold_pool { 153 const char *name; 154 spinlock_t lock; 155 spinlock_t stale_lock; 156 struct list_head *unbuddied; 157 struct list_head lru; 158 struct list_head stale; 159 atomic64_t pages_nr; 160 struct kmem_cache *c_handle; 161 const struct z3fold_ops *ops; 162 struct zpool *zpool; 163 const struct zpool_ops *zpool_ops; 164 struct workqueue_struct *compact_wq; 165 struct workqueue_struct *release_wq; 166 struct work_struct work; 167 }; 168 169 /* 170 * Internal z3fold page flags 171 */ 172 enum z3fold_page_flags { 173 PAGE_HEADLESS = 0, 174 MIDDLE_CHUNK_MAPPED, 175 NEEDS_COMPACTING, 176 PAGE_STALE, 177 PAGE_CLAIMED, /* by either reclaim or free */ 178 PAGE_MIGRATED, /* page is migrated and soon to be released */ 179 }; 180 181 /* 182 * handle flags, go under HANDLE_FLAG_MASK 183 */ 184 enum z3fold_handle_flags { 185 HANDLES_NOFREE = 0, 186 }; 187 188 /* 189 * Forward declarations 190 */ 191 static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 192 static void compact_page_work(struct work_struct *w); 193 194 /***************** 195 * Helpers 196 *****************/ 197 198 /* Converts an allocation size in bytes to size in z3fold chunks */ 199 static int size_to_chunks(size_t size) 200 { 201 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 202 } 203 204 #define for_each_unbuddied_list(_iter, _begin) \ 205 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 206 207 static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 208 gfp_t gfp) 209 { 210 struct z3fold_buddy_slots *slots = kmem_cache_zalloc(pool->c_handle, 211 gfp); 212 213 if (slots) { 214 /* It will be freed separately in free_handle(). */ 215 kmemleak_not_leak(slots); 216 slots->pool = (unsigned long)pool; 217 rwlock_init(&slots->lock); 218 } 219 220 return slots; 221 } 222 223 static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 224 { 225 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 226 } 227 228 static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 229 { 230 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 231 } 232 233 /* Lock a z3fold page */ 234 static inline void z3fold_page_lock(struct z3fold_header *zhdr) 235 { 236 spin_lock(&zhdr->page_lock); 237 } 238 239 /* Try to lock a z3fold page */ 240 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 241 { 242 return spin_trylock(&zhdr->page_lock); 243 } 244 245 /* Unlock a z3fold page */ 246 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 247 { 248 spin_unlock(&zhdr->page_lock); 249 } 250 251 /* return locked z3fold page if it's not headless */ 252 static inline struct z3fold_header *get_z3fold_header(unsigned long handle) 253 { 254 struct z3fold_buddy_slots *slots; 255 struct z3fold_header *zhdr; 256 int locked = 0; 257 258 if (!(handle & (1 << PAGE_HEADLESS))) { 259 slots = handle_to_slots(handle); 260 do { 261 unsigned long addr; 262 263 read_lock(&slots->lock); 264 addr = *(unsigned long *)handle; 265 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 266 locked = z3fold_page_trylock(zhdr); 267 read_unlock(&slots->lock); 268 if (locked) { 269 struct page *page = virt_to_page(zhdr); 270 271 if (!test_bit(PAGE_MIGRATED, &page->private)) 272 break; 273 z3fold_page_unlock(zhdr); 274 } 275 cpu_relax(); 276 } while (true); 277 } else { 278 zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 279 } 280 281 return zhdr; 282 } 283 284 static inline void put_z3fold_header(struct z3fold_header *zhdr) 285 { 286 struct page *page = virt_to_page(zhdr); 287 288 if (!test_bit(PAGE_HEADLESS, &page->private)) 289 z3fold_page_unlock(zhdr); 290 } 291 292 static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) 293 { 294 struct z3fold_buddy_slots *slots; 295 int i; 296 bool is_free; 297 298 if (WARN_ON(*(unsigned long *)handle == 0)) 299 return; 300 301 slots = handle_to_slots(handle); 302 write_lock(&slots->lock); 303 *(unsigned long *)handle = 0; 304 305 if (test_bit(HANDLES_NOFREE, &slots->pool)) { 306 write_unlock(&slots->lock); 307 return; /* simple case, nothing else to do */ 308 } 309 310 if (zhdr->slots != slots) 311 zhdr->foreign_handles--; 312 313 is_free = true; 314 for (i = 0; i <= BUDDY_MASK; i++) { 315 if (slots->slot[i]) { 316 is_free = false; 317 break; 318 } 319 } 320 write_unlock(&slots->lock); 321 322 if (is_free) { 323 struct z3fold_pool *pool = slots_to_pool(slots); 324 325 if (zhdr->slots == slots) 326 zhdr->slots = NULL; 327 kmem_cache_free(pool->c_handle, slots); 328 } 329 } 330 331 /* Initializes the z3fold header of a newly allocated z3fold page */ 332 static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 333 struct z3fold_pool *pool, gfp_t gfp) 334 { 335 struct z3fold_header *zhdr = page_address(page); 336 struct z3fold_buddy_slots *slots; 337 338 INIT_LIST_HEAD(&page->lru); 339 clear_bit(PAGE_HEADLESS, &page->private); 340 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 341 clear_bit(NEEDS_COMPACTING, &page->private); 342 clear_bit(PAGE_STALE, &page->private); 343 clear_bit(PAGE_CLAIMED, &page->private); 344 clear_bit(PAGE_MIGRATED, &page->private); 345 if (headless) 346 return zhdr; 347 348 slots = alloc_slots(pool, gfp); 349 if (!slots) 350 return NULL; 351 352 memset(zhdr, 0, sizeof(*zhdr)); 353 spin_lock_init(&zhdr->page_lock); 354 kref_init(&zhdr->refcount); 355 zhdr->cpu = -1; 356 zhdr->slots = slots; 357 zhdr->pool = pool; 358 INIT_LIST_HEAD(&zhdr->buddy); 359 INIT_WORK(&zhdr->work, compact_page_work); 360 return zhdr; 361 } 362 363 /* Resets the struct page fields and frees the page */ 364 static void free_z3fold_page(struct page *page, bool headless) 365 { 366 if (!headless) { 367 lock_page(page); 368 __ClearPageMovable(page); 369 unlock_page(page); 370 } 371 __free_page(page); 372 } 373 374 /* Helper function to build the index */ 375 static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 376 { 377 return (bud + zhdr->first_num) & BUDDY_MASK; 378 } 379 380 /* 381 * Encodes the handle of a particular buddy within a z3fold page 382 * Pool lock should be held as this function accesses first_num 383 */ 384 static unsigned long __encode_handle(struct z3fold_header *zhdr, 385 struct z3fold_buddy_slots *slots, 386 enum buddy bud) 387 { 388 unsigned long h = (unsigned long)zhdr; 389 int idx = 0; 390 391 /* 392 * For a headless page, its handle is its pointer with the extra 393 * PAGE_HEADLESS bit set 394 */ 395 if (bud == HEADLESS) 396 return h | (1 << PAGE_HEADLESS); 397 398 /* otherwise, return pointer to encoded handle */ 399 idx = __idx(zhdr, bud); 400 h += idx; 401 if (bud == LAST) 402 h |= (zhdr->last_chunks << BUDDY_SHIFT); 403 404 write_lock(&slots->lock); 405 slots->slot[idx] = h; 406 write_unlock(&slots->lock); 407 return (unsigned long)&slots->slot[idx]; 408 } 409 410 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 411 { 412 return __encode_handle(zhdr, zhdr->slots, bud); 413 } 414 415 /* only for LAST bud, returns zero otherwise */ 416 static unsigned short handle_to_chunks(unsigned long handle) 417 { 418 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 419 unsigned long addr; 420 421 read_lock(&slots->lock); 422 addr = *(unsigned long *)handle; 423 read_unlock(&slots->lock); 424 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 425 } 426 427 /* 428 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 429 * but that doesn't matter. because the masking will result in the 430 * correct buddy number. 431 */ 432 static enum buddy handle_to_buddy(unsigned long handle) 433 { 434 struct z3fold_header *zhdr; 435 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 436 unsigned long addr; 437 438 read_lock(&slots->lock); 439 WARN_ON(handle & (1 << PAGE_HEADLESS)); 440 addr = *(unsigned long *)handle; 441 read_unlock(&slots->lock); 442 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 443 return (addr - zhdr->first_num) & BUDDY_MASK; 444 } 445 446 static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 447 { 448 return zhdr->pool; 449 } 450 451 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 452 { 453 struct page *page = virt_to_page(zhdr); 454 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 455 456 WARN_ON(!list_empty(&zhdr->buddy)); 457 set_bit(PAGE_STALE, &page->private); 458 clear_bit(NEEDS_COMPACTING, &page->private); 459 spin_lock(&pool->lock); 460 if (!list_empty(&page->lru)) 461 list_del_init(&page->lru); 462 spin_unlock(&pool->lock); 463 464 if (locked) 465 z3fold_page_unlock(zhdr); 466 467 spin_lock(&pool->stale_lock); 468 list_add(&zhdr->buddy, &pool->stale); 469 queue_work(pool->release_wq, &pool->work); 470 spin_unlock(&pool->stale_lock); 471 472 atomic64_dec(&pool->pages_nr); 473 } 474 475 static void release_z3fold_page_locked(struct kref *ref) 476 { 477 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 478 refcount); 479 WARN_ON(z3fold_page_trylock(zhdr)); 480 __release_z3fold_page(zhdr, true); 481 } 482 483 static void release_z3fold_page_locked_list(struct kref *ref) 484 { 485 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 486 refcount); 487 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 488 489 spin_lock(&pool->lock); 490 list_del_init(&zhdr->buddy); 491 spin_unlock(&pool->lock); 492 493 WARN_ON(z3fold_page_trylock(zhdr)); 494 __release_z3fold_page(zhdr, true); 495 } 496 497 static void free_pages_work(struct work_struct *w) 498 { 499 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 500 501 spin_lock(&pool->stale_lock); 502 while (!list_empty(&pool->stale)) { 503 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 504 struct z3fold_header, buddy); 505 struct page *page = virt_to_page(zhdr); 506 507 list_del(&zhdr->buddy); 508 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 509 continue; 510 spin_unlock(&pool->stale_lock); 511 cancel_work_sync(&zhdr->work); 512 free_z3fold_page(page, false); 513 cond_resched(); 514 spin_lock(&pool->stale_lock); 515 } 516 spin_unlock(&pool->stale_lock); 517 } 518 519 /* 520 * Returns the number of free chunks in a z3fold page. 521 * NB: can't be used with HEADLESS pages. 522 */ 523 static int num_free_chunks(struct z3fold_header *zhdr) 524 { 525 int nfree; 526 /* 527 * If there is a middle object, pick up the bigger free space 528 * either before or after it. Otherwise just subtract the number 529 * of chunks occupied by the first and the last objects. 530 */ 531 if (zhdr->middle_chunks != 0) { 532 int nfree_before = zhdr->first_chunks ? 533 0 : zhdr->start_middle - ZHDR_CHUNKS; 534 int nfree_after = zhdr->last_chunks ? 535 0 : TOTAL_CHUNKS - 536 (zhdr->start_middle + zhdr->middle_chunks); 537 nfree = max(nfree_before, nfree_after); 538 } else 539 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 540 return nfree; 541 } 542 543 /* Add to the appropriate unbuddied list */ 544 static inline void add_to_unbuddied(struct z3fold_pool *pool, 545 struct z3fold_header *zhdr) 546 { 547 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 548 zhdr->middle_chunks == 0) { 549 struct list_head *unbuddied; 550 int freechunks = num_free_chunks(zhdr); 551 552 migrate_disable(); 553 unbuddied = this_cpu_ptr(pool->unbuddied); 554 spin_lock(&pool->lock); 555 list_add(&zhdr->buddy, &unbuddied[freechunks]); 556 spin_unlock(&pool->lock); 557 zhdr->cpu = smp_processor_id(); 558 migrate_enable(); 559 } 560 } 561 562 static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) 563 { 564 enum buddy bud = HEADLESS; 565 566 if (zhdr->middle_chunks) { 567 if (!zhdr->first_chunks && 568 chunks <= zhdr->start_middle - ZHDR_CHUNKS) 569 bud = FIRST; 570 else if (!zhdr->last_chunks) 571 bud = LAST; 572 } else { 573 if (!zhdr->first_chunks) 574 bud = FIRST; 575 else if (!zhdr->last_chunks) 576 bud = LAST; 577 else 578 bud = MIDDLE; 579 } 580 581 return bud; 582 } 583 584 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 585 unsigned short dst_chunk) 586 { 587 void *beg = zhdr; 588 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 589 beg + (zhdr->start_middle << CHUNK_SHIFT), 590 zhdr->middle_chunks << CHUNK_SHIFT); 591 } 592 593 static inline bool buddy_single(struct z3fold_header *zhdr) 594 { 595 return !((zhdr->first_chunks && zhdr->middle_chunks) || 596 (zhdr->first_chunks && zhdr->last_chunks) || 597 (zhdr->middle_chunks && zhdr->last_chunks)); 598 } 599 600 static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 601 { 602 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 603 void *p = zhdr; 604 unsigned long old_handle = 0; 605 size_t sz = 0; 606 struct z3fold_header *new_zhdr = NULL; 607 int first_idx = __idx(zhdr, FIRST); 608 int middle_idx = __idx(zhdr, MIDDLE); 609 int last_idx = __idx(zhdr, LAST); 610 unsigned short *moved_chunks = NULL; 611 612 /* 613 * No need to protect slots here -- all the slots are "local" and 614 * the page lock is already taken 615 */ 616 if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 617 p += ZHDR_SIZE_ALIGNED; 618 sz = zhdr->first_chunks << CHUNK_SHIFT; 619 old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 620 moved_chunks = &zhdr->first_chunks; 621 } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 622 p += zhdr->start_middle << CHUNK_SHIFT; 623 sz = zhdr->middle_chunks << CHUNK_SHIFT; 624 old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 625 moved_chunks = &zhdr->middle_chunks; 626 } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 627 p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 628 sz = zhdr->last_chunks << CHUNK_SHIFT; 629 old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 630 moved_chunks = &zhdr->last_chunks; 631 } 632 633 if (sz > 0) { 634 enum buddy new_bud = HEADLESS; 635 short chunks = size_to_chunks(sz); 636 void *q; 637 638 new_zhdr = __z3fold_alloc(pool, sz, false); 639 if (!new_zhdr) 640 return NULL; 641 642 if (WARN_ON(new_zhdr == zhdr)) 643 goto out_fail; 644 645 new_bud = get_free_buddy(new_zhdr, chunks); 646 q = new_zhdr; 647 switch (new_bud) { 648 case FIRST: 649 new_zhdr->first_chunks = chunks; 650 q += ZHDR_SIZE_ALIGNED; 651 break; 652 case MIDDLE: 653 new_zhdr->middle_chunks = chunks; 654 new_zhdr->start_middle = 655 new_zhdr->first_chunks + ZHDR_CHUNKS; 656 q += new_zhdr->start_middle << CHUNK_SHIFT; 657 break; 658 case LAST: 659 new_zhdr->last_chunks = chunks; 660 q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 661 break; 662 default: 663 goto out_fail; 664 } 665 new_zhdr->foreign_handles++; 666 memcpy(q, p, sz); 667 write_lock(&zhdr->slots->lock); 668 *(unsigned long *)old_handle = (unsigned long)new_zhdr + 669 __idx(new_zhdr, new_bud); 670 if (new_bud == LAST) 671 *(unsigned long *)old_handle |= 672 (new_zhdr->last_chunks << BUDDY_SHIFT); 673 write_unlock(&zhdr->slots->lock); 674 add_to_unbuddied(pool, new_zhdr); 675 z3fold_page_unlock(new_zhdr); 676 677 *moved_chunks = 0; 678 } 679 680 return new_zhdr; 681 682 out_fail: 683 if (new_zhdr && !kref_put(&new_zhdr->refcount, release_z3fold_page_locked)) { 684 add_to_unbuddied(pool, new_zhdr); 685 z3fold_page_unlock(new_zhdr); 686 } 687 return NULL; 688 689 } 690 691 #define BIG_CHUNK_GAP 3 692 /* Has to be called with lock held */ 693 static int z3fold_compact_page(struct z3fold_header *zhdr) 694 { 695 struct page *page = virt_to_page(zhdr); 696 697 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 698 return 0; /* can't move middle chunk, it's used */ 699 700 if (unlikely(PageIsolated(page))) 701 return 0; 702 703 if (zhdr->middle_chunks == 0) 704 return 0; /* nothing to compact */ 705 706 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 707 /* move to the beginning */ 708 mchunk_memmove(zhdr, ZHDR_CHUNKS); 709 zhdr->first_chunks = zhdr->middle_chunks; 710 zhdr->middle_chunks = 0; 711 zhdr->start_middle = 0; 712 zhdr->first_num++; 713 return 1; 714 } 715 716 /* 717 * moving data is expensive, so let's only do that if 718 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 719 */ 720 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 721 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 722 BIG_CHUNK_GAP) { 723 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 724 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 725 return 1; 726 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 727 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 728 + zhdr->middle_chunks) >= 729 BIG_CHUNK_GAP) { 730 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 731 zhdr->middle_chunks; 732 mchunk_memmove(zhdr, new_start); 733 zhdr->start_middle = new_start; 734 return 1; 735 } 736 737 return 0; 738 } 739 740 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 741 { 742 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 743 struct page *page; 744 745 page = virt_to_page(zhdr); 746 if (locked) 747 WARN_ON(z3fold_page_trylock(zhdr)); 748 else 749 z3fold_page_lock(zhdr); 750 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 751 z3fold_page_unlock(zhdr); 752 return; 753 } 754 spin_lock(&pool->lock); 755 list_del_init(&zhdr->buddy); 756 spin_unlock(&pool->lock); 757 758 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) 759 return; 760 761 if (test_bit(PAGE_STALE, &page->private) || 762 test_and_set_bit(PAGE_CLAIMED, &page->private)) { 763 z3fold_page_unlock(zhdr); 764 return; 765 } 766 767 if (!zhdr->foreign_handles && buddy_single(zhdr) && 768 zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 769 if (!kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 770 clear_bit(PAGE_CLAIMED, &page->private); 771 z3fold_page_unlock(zhdr); 772 } 773 return; 774 } 775 776 z3fold_compact_page(zhdr); 777 add_to_unbuddied(pool, zhdr); 778 clear_bit(PAGE_CLAIMED, &page->private); 779 z3fold_page_unlock(zhdr); 780 } 781 782 static void compact_page_work(struct work_struct *w) 783 { 784 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 785 work); 786 787 do_compact_page(zhdr, false); 788 } 789 790 /* returns _locked_ z3fold page header or NULL */ 791 static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 792 size_t size, bool can_sleep) 793 { 794 struct z3fold_header *zhdr = NULL; 795 struct page *page; 796 struct list_head *unbuddied; 797 int chunks = size_to_chunks(size), i; 798 799 lookup: 800 migrate_disable(); 801 /* First, try to find an unbuddied z3fold page. */ 802 unbuddied = this_cpu_ptr(pool->unbuddied); 803 for_each_unbuddied_list(i, chunks) { 804 struct list_head *l = &unbuddied[i]; 805 806 zhdr = list_first_entry_or_null(READ_ONCE(l), 807 struct z3fold_header, buddy); 808 809 if (!zhdr) 810 continue; 811 812 /* Re-check under lock. */ 813 spin_lock(&pool->lock); 814 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 815 struct z3fold_header, buddy)) || 816 !z3fold_page_trylock(zhdr)) { 817 spin_unlock(&pool->lock); 818 zhdr = NULL; 819 migrate_enable(); 820 if (can_sleep) 821 cond_resched(); 822 goto lookup; 823 } 824 list_del_init(&zhdr->buddy); 825 zhdr->cpu = -1; 826 spin_unlock(&pool->lock); 827 828 page = virt_to_page(zhdr); 829 if (test_bit(NEEDS_COMPACTING, &page->private) || 830 test_bit(PAGE_CLAIMED, &page->private)) { 831 z3fold_page_unlock(zhdr); 832 zhdr = NULL; 833 migrate_enable(); 834 if (can_sleep) 835 cond_resched(); 836 goto lookup; 837 } 838 839 /* 840 * this page could not be removed from its unbuddied 841 * list while pool lock was held, and then we've taken 842 * page lock so kref_put could not be called before 843 * we got here, so it's safe to just call kref_get() 844 */ 845 kref_get(&zhdr->refcount); 846 break; 847 } 848 migrate_enable(); 849 850 if (!zhdr) { 851 int cpu; 852 853 /* look for _exact_ match on other cpus' lists */ 854 for_each_online_cpu(cpu) { 855 struct list_head *l; 856 857 unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 858 spin_lock(&pool->lock); 859 l = &unbuddied[chunks]; 860 861 zhdr = list_first_entry_or_null(READ_ONCE(l), 862 struct z3fold_header, buddy); 863 864 if (!zhdr || !z3fold_page_trylock(zhdr)) { 865 spin_unlock(&pool->lock); 866 zhdr = NULL; 867 continue; 868 } 869 list_del_init(&zhdr->buddy); 870 zhdr->cpu = -1; 871 spin_unlock(&pool->lock); 872 873 page = virt_to_page(zhdr); 874 if (test_bit(NEEDS_COMPACTING, &page->private) || 875 test_bit(PAGE_CLAIMED, &page->private)) { 876 z3fold_page_unlock(zhdr); 877 zhdr = NULL; 878 if (can_sleep) 879 cond_resched(); 880 continue; 881 } 882 kref_get(&zhdr->refcount); 883 break; 884 } 885 } 886 887 if (zhdr && !zhdr->slots) { 888 zhdr->slots = alloc_slots(pool, GFP_ATOMIC); 889 if (!zhdr->slots) 890 goto out_fail; 891 } 892 return zhdr; 893 894 out_fail: 895 if (!kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 896 add_to_unbuddied(pool, zhdr); 897 z3fold_page_unlock(zhdr); 898 } 899 return NULL; 900 } 901 902 /* 903 * API Functions 904 */ 905 906 /** 907 * z3fold_create_pool() - create a new z3fold pool 908 * @name: pool name 909 * @gfp: gfp flags when allocating the z3fold pool structure 910 * @ops: user-defined operations for the z3fold pool 911 * 912 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 913 * failed. 914 */ 915 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 916 const struct z3fold_ops *ops) 917 { 918 struct z3fold_pool *pool = NULL; 919 int i, cpu; 920 921 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 922 if (!pool) 923 goto out; 924 pool->c_handle = kmem_cache_create("z3fold_handle", 925 sizeof(struct z3fold_buddy_slots), 926 SLOTS_ALIGN, 0, NULL); 927 if (!pool->c_handle) 928 goto out_c; 929 spin_lock_init(&pool->lock); 930 spin_lock_init(&pool->stale_lock); 931 pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, 932 __alignof__(struct list_head)); 933 if (!pool->unbuddied) 934 goto out_pool; 935 for_each_possible_cpu(cpu) { 936 struct list_head *unbuddied = 937 per_cpu_ptr(pool->unbuddied, cpu); 938 for_each_unbuddied_list(i, 0) 939 INIT_LIST_HEAD(&unbuddied[i]); 940 } 941 INIT_LIST_HEAD(&pool->lru); 942 INIT_LIST_HEAD(&pool->stale); 943 atomic64_set(&pool->pages_nr, 0); 944 pool->name = name; 945 pool->compact_wq = create_singlethread_workqueue(pool->name); 946 if (!pool->compact_wq) 947 goto out_unbuddied; 948 pool->release_wq = create_singlethread_workqueue(pool->name); 949 if (!pool->release_wq) 950 goto out_wq; 951 INIT_WORK(&pool->work, free_pages_work); 952 pool->ops = ops; 953 return pool; 954 955 out_wq: 956 destroy_workqueue(pool->compact_wq); 957 out_unbuddied: 958 free_percpu(pool->unbuddied); 959 out_pool: 960 kmem_cache_destroy(pool->c_handle); 961 out_c: 962 kfree(pool); 963 out: 964 return NULL; 965 } 966 967 /** 968 * z3fold_destroy_pool() - destroys an existing z3fold pool 969 * @pool: the z3fold pool to be destroyed 970 * 971 * The pool should be emptied before this function is called. 972 */ 973 static void z3fold_destroy_pool(struct z3fold_pool *pool) 974 { 975 kmem_cache_destroy(pool->c_handle); 976 977 /* 978 * We need to destroy pool->compact_wq before pool->release_wq, 979 * as any pending work on pool->compact_wq will call 980 * queue_work(pool->release_wq, &pool->work). 981 * 982 * There are still outstanding pages until both workqueues are drained, 983 * so we cannot unregister migration until then. 984 */ 985 986 destroy_workqueue(pool->compact_wq); 987 destroy_workqueue(pool->release_wq); 988 free_percpu(pool->unbuddied); 989 kfree(pool); 990 } 991 992 static const struct movable_operations z3fold_mops; 993 994 /** 995 * z3fold_alloc() - allocates a region of a given size 996 * @pool: z3fold pool from which to allocate 997 * @size: size in bytes of the desired allocation 998 * @gfp: gfp flags used if the pool needs to grow 999 * @handle: handle of the new allocation 1000 * 1001 * This function will attempt to find a free region in the pool large enough to 1002 * satisfy the allocation request. A search of the unbuddied lists is 1003 * performed first. If no suitable free region is found, then a new page is 1004 * allocated and added to the pool to satisfy the request. 1005 * 1006 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 1007 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 1008 * a new page. 1009 */ 1010 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 1011 unsigned long *handle) 1012 { 1013 int chunks = size_to_chunks(size); 1014 struct z3fold_header *zhdr = NULL; 1015 struct page *page = NULL; 1016 enum buddy bud; 1017 bool can_sleep = gfpflags_allow_blocking(gfp); 1018 1019 if (!size || (gfp & __GFP_HIGHMEM)) 1020 return -EINVAL; 1021 1022 if (size > PAGE_SIZE) 1023 return -ENOSPC; 1024 1025 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 1026 bud = HEADLESS; 1027 else { 1028 retry: 1029 zhdr = __z3fold_alloc(pool, size, can_sleep); 1030 if (zhdr) { 1031 bud = get_free_buddy(zhdr, chunks); 1032 if (bud == HEADLESS) { 1033 if (!kref_put(&zhdr->refcount, 1034 release_z3fold_page_locked)) 1035 z3fold_page_unlock(zhdr); 1036 pr_err("No free chunks in unbuddied\n"); 1037 WARN_ON(1); 1038 goto retry; 1039 } 1040 page = virt_to_page(zhdr); 1041 goto found; 1042 } 1043 bud = FIRST; 1044 } 1045 1046 page = alloc_page(gfp); 1047 if (!page) 1048 return -ENOMEM; 1049 1050 zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 1051 if (!zhdr) { 1052 __free_page(page); 1053 return -ENOMEM; 1054 } 1055 atomic64_inc(&pool->pages_nr); 1056 1057 if (bud == HEADLESS) { 1058 set_bit(PAGE_HEADLESS, &page->private); 1059 goto headless; 1060 } 1061 if (can_sleep) { 1062 lock_page(page); 1063 __SetPageMovable(page, &z3fold_mops); 1064 unlock_page(page); 1065 } else { 1066 WARN_ON(!trylock_page(page)); 1067 __SetPageMovable(page, &z3fold_mops); 1068 unlock_page(page); 1069 } 1070 z3fold_page_lock(zhdr); 1071 1072 found: 1073 if (bud == FIRST) 1074 zhdr->first_chunks = chunks; 1075 else if (bud == LAST) 1076 zhdr->last_chunks = chunks; 1077 else { 1078 zhdr->middle_chunks = chunks; 1079 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 1080 } 1081 add_to_unbuddied(pool, zhdr); 1082 1083 headless: 1084 spin_lock(&pool->lock); 1085 /* Add/move z3fold page to beginning of LRU */ 1086 if (!list_empty(&page->lru)) 1087 list_del(&page->lru); 1088 1089 list_add(&page->lru, &pool->lru); 1090 1091 *handle = encode_handle(zhdr, bud); 1092 spin_unlock(&pool->lock); 1093 if (bud != HEADLESS) 1094 z3fold_page_unlock(zhdr); 1095 1096 return 0; 1097 } 1098 1099 /** 1100 * z3fold_free() - frees the allocation associated with the given handle 1101 * @pool: pool in which the allocation resided 1102 * @handle: handle associated with the allocation returned by z3fold_alloc() 1103 * 1104 * In the case that the z3fold page in which the allocation resides is under 1105 * reclaim, as indicated by the PAGE_CLAIMED flag being set, this function 1106 * only sets the first|middle|last_chunks to 0. The page is actually freed 1107 * once all buddies are evicted (see z3fold_reclaim_page() below). 1108 */ 1109 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 1110 { 1111 struct z3fold_header *zhdr; 1112 struct page *page; 1113 enum buddy bud; 1114 bool page_claimed; 1115 1116 zhdr = get_z3fold_header(handle); 1117 page = virt_to_page(zhdr); 1118 page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 1119 1120 if (test_bit(PAGE_HEADLESS, &page->private)) { 1121 /* if a headless page is under reclaim, just leave. 1122 * NB: we use test_and_set_bit for a reason: if the bit 1123 * has not been set before, we release this page 1124 * immediately so we don't care about its value any more. 1125 */ 1126 if (!page_claimed) { 1127 spin_lock(&pool->lock); 1128 list_del(&page->lru); 1129 spin_unlock(&pool->lock); 1130 put_z3fold_header(zhdr); 1131 free_z3fold_page(page, true); 1132 atomic64_dec(&pool->pages_nr); 1133 } 1134 return; 1135 } 1136 1137 /* Non-headless case */ 1138 bud = handle_to_buddy(handle); 1139 1140 switch (bud) { 1141 case FIRST: 1142 zhdr->first_chunks = 0; 1143 break; 1144 case MIDDLE: 1145 zhdr->middle_chunks = 0; 1146 break; 1147 case LAST: 1148 zhdr->last_chunks = 0; 1149 break; 1150 default: 1151 pr_err("%s: unknown bud %d\n", __func__, bud); 1152 WARN_ON(1); 1153 put_z3fold_header(zhdr); 1154 return; 1155 } 1156 1157 if (!page_claimed) 1158 free_handle(handle, zhdr); 1159 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) 1160 return; 1161 if (page_claimed) { 1162 /* the page has not been claimed by us */ 1163 put_z3fold_header(zhdr); 1164 return; 1165 } 1166 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1167 clear_bit(PAGE_CLAIMED, &page->private); 1168 put_z3fold_header(zhdr); 1169 return; 1170 } 1171 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1172 zhdr->cpu = -1; 1173 kref_get(&zhdr->refcount); 1174 clear_bit(PAGE_CLAIMED, &page->private); 1175 do_compact_page(zhdr, true); 1176 return; 1177 } 1178 kref_get(&zhdr->refcount); 1179 clear_bit(PAGE_CLAIMED, &page->private); 1180 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1181 put_z3fold_header(zhdr); 1182 } 1183 1184 /** 1185 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 1186 * @pool: pool from which a page will attempt to be evicted 1187 * @retries: number of pages on the LRU list for which eviction will 1188 * be attempted before failing 1189 * 1190 * z3fold reclaim is different from normal system reclaim in that it is done 1191 * from the bottom, up. This is because only the bottom layer, z3fold, has 1192 * information on how the allocations are organized within each z3fold page. 1193 * This has the potential to create interesting locking situations between 1194 * z3fold and the user, however. 1195 * 1196 * To avoid these, this is how z3fold_reclaim_page() should be called: 1197 * 1198 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 1199 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 1200 * call the user-defined eviction handler with the pool and handle as 1201 * arguments. 1202 * 1203 * If the handle can not be evicted, the eviction handler should return 1204 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 1205 * appropriate list and try the next z3fold page on the LRU up to 1206 * a user defined number of retries. 1207 * 1208 * If the handle is successfully evicted, the eviction handler should 1209 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 1210 * contains logic to delay freeing the page if the page is under reclaim, 1211 * as indicated by the setting of the PG_reclaim flag on the underlying page. 1212 * 1213 * If all buddies in the z3fold page are successfully evicted, then the 1214 * z3fold page can be freed. 1215 * 1216 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 1217 * no pages to evict or an eviction handler is not registered, -EAGAIN if 1218 * the retry limit was hit. 1219 */ 1220 static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 1221 { 1222 int i, ret = -1; 1223 struct z3fold_header *zhdr = NULL; 1224 struct page *page = NULL; 1225 struct list_head *pos; 1226 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 1227 struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN))); 1228 1229 rwlock_init(&slots.lock); 1230 slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE); 1231 1232 spin_lock(&pool->lock); 1233 if (!pool->ops || !pool->ops->evict || retries == 0) { 1234 spin_unlock(&pool->lock); 1235 return -EINVAL; 1236 } 1237 for (i = 0; i < retries; i++) { 1238 if (list_empty(&pool->lru)) { 1239 spin_unlock(&pool->lock); 1240 return -EINVAL; 1241 } 1242 list_for_each_prev(pos, &pool->lru) { 1243 page = list_entry(pos, struct page, lru); 1244 1245 zhdr = page_address(page); 1246 if (test_bit(PAGE_HEADLESS, &page->private)) { 1247 /* 1248 * For non-headless pages, we wait to do this 1249 * until we have the page lock to avoid racing 1250 * with __z3fold_alloc(). Headless pages don't 1251 * have a lock (and __z3fold_alloc() will never 1252 * see them), but we still need to test and set 1253 * PAGE_CLAIMED to avoid racing with 1254 * z3fold_free(), so just do it now before 1255 * leaving the loop. 1256 */ 1257 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 1258 continue; 1259 1260 break; 1261 } 1262 1263 if (!z3fold_page_trylock(zhdr)) { 1264 zhdr = NULL; 1265 continue; /* can't evict at this point */ 1266 } 1267 1268 /* test_and_set_bit is of course atomic, but we still 1269 * need to do it under page lock, otherwise checking 1270 * that bit in __z3fold_alloc wouldn't make sense 1271 */ 1272 if (zhdr->foreign_handles || 1273 test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1274 z3fold_page_unlock(zhdr); 1275 zhdr = NULL; 1276 continue; /* can't evict such page */ 1277 } 1278 list_del_init(&zhdr->buddy); 1279 zhdr->cpu = -1; 1280 /* See comment in __z3fold_alloc. */ 1281 kref_get(&zhdr->refcount); 1282 break; 1283 } 1284 1285 if (!zhdr) 1286 break; 1287 1288 list_del_init(&page->lru); 1289 spin_unlock(&pool->lock); 1290 1291 if (!test_bit(PAGE_HEADLESS, &page->private)) { 1292 /* 1293 * We need encode the handles before unlocking, and 1294 * use our local slots structure because z3fold_free 1295 * can zero out zhdr->slots and we can't do much 1296 * about that 1297 */ 1298 first_handle = 0; 1299 last_handle = 0; 1300 middle_handle = 0; 1301 memset(slots.slot, 0, sizeof(slots.slot)); 1302 if (zhdr->first_chunks) 1303 first_handle = __encode_handle(zhdr, &slots, 1304 FIRST); 1305 if (zhdr->middle_chunks) 1306 middle_handle = __encode_handle(zhdr, &slots, 1307 MIDDLE); 1308 if (zhdr->last_chunks) 1309 last_handle = __encode_handle(zhdr, &slots, 1310 LAST); 1311 /* 1312 * it's safe to unlock here because we hold a 1313 * reference to this page 1314 */ 1315 z3fold_page_unlock(zhdr); 1316 } else { 1317 first_handle = encode_handle(zhdr, HEADLESS); 1318 last_handle = middle_handle = 0; 1319 } 1320 /* Issue the eviction callback(s) */ 1321 if (middle_handle) { 1322 ret = pool->ops->evict(pool, middle_handle); 1323 if (ret) 1324 goto next; 1325 } 1326 if (first_handle) { 1327 ret = pool->ops->evict(pool, first_handle); 1328 if (ret) 1329 goto next; 1330 } 1331 if (last_handle) { 1332 ret = pool->ops->evict(pool, last_handle); 1333 if (ret) 1334 goto next; 1335 } 1336 next: 1337 if (test_bit(PAGE_HEADLESS, &page->private)) { 1338 if (ret == 0) { 1339 free_z3fold_page(page, true); 1340 atomic64_dec(&pool->pages_nr); 1341 return 0; 1342 } 1343 spin_lock(&pool->lock); 1344 list_add(&page->lru, &pool->lru); 1345 spin_unlock(&pool->lock); 1346 clear_bit(PAGE_CLAIMED, &page->private); 1347 } else { 1348 struct z3fold_buddy_slots *slots = zhdr->slots; 1349 z3fold_page_lock(zhdr); 1350 if (kref_put(&zhdr->refcount, 1351 release_z3fold_page_locked)) { 1352 kmem_cache_free(pool->c_handle, slots); 1353 return 0; 1354 } 1355 /* 1356 * if we are here, the page is still not completely 1357 * free. Take the global pool lock then to be able 1358 * to add it back to the lru list 1359 */ 1360 spin_lock(&pool->lock); 1361 list_add(&page->lru, &pool->lru); 1362 spin_unlock(&pool->lock); 1363 if (list_empty(&zhdr->buddy)) 1364 add_to_unbuddied(pool, zhdr); 1365 clear_bit(PAGE_CLAIMED, &page->private); 1366 z3fold_page_unlock(zhdr); 1367 } 1368 1369 /* We started off locked to we need to lock the pool back */ 1370 spin_lock(&pool->lock); 1371 } 1372 spin_unlock(&pool->lock); 1373 return -EAGAIN; 1374 } 1375 1376 /** 1377 * z3fold_map() - maps the allocation associated with the given handle 1378 * @pool: pool in which the allocation resides 1379 * @handle: handle associated with the allocation to be mapped 1380 * 1381 * Extracts the buddy number from handle and constructs the pointer to the 1382 * correct starting chunk within the page. 1383 * 1384 * Returns: a pointer to the mapped allocation 1385 */ 1386 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1387 { 1388 struct z3fold_header *zhdr; 1389 struct page *page; 1390 void *addr; 1391 enum buddy buddy; 1392 1393 zhdr = get_z3fold_header(handle); 1394 addr = zhdr; 1395 page = virt_to_page(zhdr); 1396 1397 if (test_bit(PAGE_HEADLESS, &page->private)) 1398 goto out; 1399 1400 buddy = handle_to_buddy(handle); 1401 switch (buddy) { 1402 case FIRST: 1403 addr += ZHDR_SIZE_ALIGNED; 1404 break; 1405 case MIDDLE: 1406 addr += zhdr->start_middle << CHUNK_SHIFT; 1407 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1408 break; 1409 case LAST: 1410 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 1411 break; 1412 default: 1413 pr_err("unknown buddy id %d\n", buddy); 1414 WARN_ON(1); 1415 addr = NULL; 1416 break; 1417 } 1418 1419 if (addr) 1420 zhdr->mapped_count++; 1421 out: 1422 put_z3fold_header(zhdr); 1423 return addr; 1424 } 1425 1426 /** 1427 * z3fold_unmap() - unmaps the allocation associated with the given handle 1428 * @pool: pool in which the allocation resides 1429 * @handle: handle associated with the allocation to be unmapped 1430 */ 1431 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1432 { 1433 struct z3fold_header *zhdr; 1434 struct page *page; 1435 enum buddy buddy; 1436 1437 zhdr = get_z3fold_header(handle); 1438 page = virt_to_page(zhdr); 1439 1440 if (test_bit(PAGE_HEADLESS, &page->private)) 1441 return; 1442 1443 buddy = handle_to_buddy(handle); 1444 if (buddy == MIDDLE) 1445 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1446 zhdr->mapped_count--; 1447 put_z3fold_header(zhdr); 1448 } 1449 1450 /** 1451 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1452 * @pool: pool whose size is being queried 1453 * 1454 * Returns: size in pages of the given pool. 1455 */ 1456 static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1457 { 1458 return atomic64_read(&pool->pages_nr); 1459 } 1460 1461 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 1462 { 1463 struct z3fold_header *zhdr; 1464 struct z3fold_pool *pool; 1465 1466 VM_BUG_ON_PAGE(!PageMovable(page), page); 1467 VM_BUG_ON_PAGE(PageIsolated(page), page); 1468 1469 if (test_bit(PAGE_HEADLESS, &page->private)) 1470 return false; 1471 1472 zhdr = page_address(page); 1473 z3fold_page_lock(zhdr); 1474 if (test_bit(NEEDS_COMPACTING, &page->private) || 1475 test_bit(PAGE_STALE, &page->private)) 1476 goto out; 1477 1478 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 1479 goto out; 1480 1481 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 1482 goto out; 1483 pool = zhdr_to_pool(zhdr); 1484 spin_lock(&pool->lock); 1485 if (!list_empty(&zhdr->buddy)) 1486 list_del_init(&zhdr->buddy); 1487 if (!list_empty(&page->lru)) 1488 list_del_init(&page->lru); 1489 spin_unlock(&pool->lock); 1490 1491 kref_get(&zhdr->refcount); 1492 z3fold_page_unlock(zhdr); 1493 return true; 1494 1495 out: 1496 z3fold_page_unlock(zhdr); 1497 return false; 1498 } 1499 1500 static int z3fold_page_migrate(struct page *newpage, struct page *page, 1501 enum migrate_mode mode) 1502 { 1503 struct z3fold_header *zhdr, *new_zhdr; 1504 struct z3fold_pool *pool; 1505 1506 VM_BUG_ON_PAGE(!PageMovable(page), page); 1507 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1508 VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); 1509 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 1510 1511 zhdr = page_address(page); 1512 pool = zhdr_to_pool(zhdr); 1513 1514 if (!z3fold_page_trylock(zhdr)) 1515 return -EAGAIN; 1516 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 1517 clear_bit(PAGE_CLAIMED, &page->private); 1518 z3fold_page_unlock(zhdr); 1519 return -EBUSY; 1520 } 1521 if (work_pending(&zhdr->work)) { 1522 z3fold_page_unlock(zhdr); 1523 return -EAGAIN; 1524 } 1525 new_zhdr = page_address(newpage); 1526 memcpy(new_zhdr, zhdr, PAGE_SIZE); 1527 newpage->private = page->private; 1528 set_bit(PAGE_MIGRATED, &page->private); 1529 z3fold_page_unlock(zhdr); 1530 spin_lock_init(&new_zhdr->page_lock); 1531 INIT_WORK(&new_zhdr->work, compact_page_work); 1532 /* 1533 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1534 * so we only have to reinitialize it. 1535 */ 1536 INIT_LIST_HEAD(&new_zhdr->buddy); 1537 __ClearPageMovable(page); 1538 1539 get_page(newpage); 1540 z3fold_page_lock(new_zhdr); 1541 if (new_zhdr->first_chunks) 1542 encode_handle(new_zhdr, FIRST); 1543 if (new_zhdr->last_chunks) 1544 encode_handle(new_zhdr, LAST); 1545 if (new_zhdr->middle_chunks) 1546 encode_handle(new_zhdr, MIDDLE); 1547 set_bit(NEEDS_COMPACTING, &newpage->private); 1548 new_zhdr->cpu = smp_processor_id(); 1549 spin_lock(&pool->lock); 1550 list_add(&newpage->lru, &pool->lru); 1551 spin_unlock(&pool->lock); 1552 __SetPageMovable(newpage, &z3fold_mops); 1553 z3fold_page_unlock(new_zhdr); 1554 1555 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 1556 1557 /* PAGE_CLAIMED and PAGE_MIGRATED are cleared now. */ 1558 page->private = 0; 1559 put_page(page); 1560 return 0; 1561 } 1562 1563 static void z3fold_page_putback(struct page *page) 1564 { 1565 struct z3fold_header *zhdr; 1566 struct z3fold_pool *pool; 1567 1568 zhdr = page_address(page); 1569 pool = zhdr_to_pool(zhdr); 1570 1571 z3fold_page_lock(zhdr); 1572 if (!list_empty(&zhdr->buddy)) 1573 list_del_init(&zhdr->buddy); 1574 INIT_LIST_HEAD(&page->lru); 1575 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) 1576 return; 1577 spin_lock(&pool->lock); 1578 list_add(&page->lru, &pool->lru); 1579 spin_unlock(&pool->lock); 1580 if (list_empty(&zhdr->buddy)) 1581 add_to_unbuddied(pool, zhdr); 1582 clear_bit(PAGE_CLAIMED, &page->private); 1583 z3fold_page_unlock(zhdr); 1584 } 1585 1586 static const struct movable_operations z3fold_mops = { 1587 .isolate_page = z3fold_page_isolate, 1588 .migrate_page = z3fold_page_migrate, 1589 .putback_page = z3fold_page_putback, 1590 }; 1591 1592 /***************** 1593 * zpool 1594 ****************/ 1595 1596 static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1597 { 1598 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1599 return pool->zpool_ops->evict(pool->zpool, handle); 1600 else 1601 return -ENOENT; 1602 } 1603 1604 static const struct z3fold_ops z3fold_zpool_ops = { 1605 .evict = z3fold_zpool_evict 1606 }; 1607 1608 static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1609 const struct zpool_ops *zpool_ops, 1610 struct zpool *zpool) 1611 { 1612 struct z3fold_pool *pool; 1613 1614 pool = z3fold_create_pool(name, gfp, 1615 zpool_ops ? &z3fold_zpool_ops : NULL); 1616 if (pool) { 1617 pool->zpool = zpool; 1618 pool->zpool_ops = zpool_ops; 1619 } 1620 return pool; 1621 } 1622 1623 static void z3fold_zpool_destroy(void *pool) 1624 { 1625 z3fold_destroy_pool(pool); 1626 } 1627 1628 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1629 unsigned long *handle) 1630 { 1631 return z3fold_alloc(pool, size, gfp, handle); 1632 } 1633 static void z3fold_zpool_free(void *pool, unsigned long handle) 1634 { 1635 z3fold_free(pool, handle); 1636 } 1637 1638 static int z3fold_zpool_shrink(void *pool, unsigned int pages, 1639 unsigned int *reclaimed) 1640 { 1641 unsigned int total = 0; 1642 int ret = -EINVAL; 1643 1644 while (total < pages) { 1645 ret = z3fold_reclaim_page(pool, 8); 1646 if (ret < 0) 1647 break; 1648 total++; 1649 } 1650 1651 if (reclaimed) 1652 *reclaimed = total; 1653 1654 return ret; 1655 } 1656 1657 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1658 enum zpool_mapmode mm) 1659 { 1660 return z3fold_map(pool, handle); 1661 } 1662 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1663 { 1664 z3fold_unmap(pool, handle); 1665 } 1666 1667 static u64 z3fold_zpool_total_size(void *pool) 1668 { 1669 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1670 } 1671 1672 static struct zpool_driver z3fold_zpool_driver = { 1673 .type = "z3fold", 1674 .sleep_mapped = true, 1675 .owner = THIS_MODULE, 1676 .create = z3fold_zpool_create, 1677 .destroy = z3fold_zpool_destroy, 1678 .malloc = z3fold_zpool_malloc, 1679 .free = z3fold_zpool_free, 1680 .shrink = z3fold_zpool_shrink, 1681 .map = z3fold_zpool_map, 1682 .unmap = z3fold_zpool_unmap, 1683 .total_size = z3fold_zpool_total_size, 1684 }; 1685 1686 MODULE_ALIAS("zpool-z3fold"); 1687 1688 static int __init init_z3fold(void) 1689 { 1690 /* 1691 * Make sure the z3fold header is not larger than the page size and 1692 * there has remaining spaces for its buddy. 1693 */ 1694 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); 1695 zpool_register_driver(&z3fold_zpool_driver); 1696 1697 return 0; 1698 } 1699 1700 static void __exit exit_z3fold(void) 1701 { 1702 zpool_unregister_driver(&z3fold_zpool_driver); 1703 } 1704 1705 module_init(init_z3fold); 1706 module_exit(exit_z3fold); 1707 1708 MODULE_LICENSE("GPL"); 1709 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1710 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1711