1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory Migration functionality - linux/mm/migrate.c 4 * 5 * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter 6 * 7 * Page migration was first developed in the context of the memory hotplug 8 * project. The main authors of the migration code are: 9 * 10 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> 11 * Hirokazu Takahashi <taka@valinux.co.jp> 12 * Dave Hansen <haveblue@us.ibm.com> 13 * Christoph Lameter 14 */ 15 16 #include <linux/migrate.h> 17 #include <linux/export.h> 18 #include <linux/swap.h> 19 #include <linux/swapops.h> 20 #include <linux/pagemap.h> 21 #include <linux/buffer_head.h> 22 #include <linux/mm_inline.h> 23 #include <linux/nsproxy.h> 24 #include <linux/ksm.h> 25 #include <linux/rmap.h> 26 #include <linux/topology.h> 27 #include <linux/cpu.h> 28 #include <linux/cpuset.h> 29 #include <linux/writeback.h> 30 #include <linux/mempolicy.h> 31 #include <linux/vmalloc.h> 32 #include <linux/security.h> 33 #include <linux/backing-dev.h> 34 #include <linux/compaction.h> 35 #include <linux/syscalls.h> 36 #include <linux/compat.h> 37 #include <linux/hugetlb.h> 38 #include <linux/hugetlb_cgroup.h> 39 #include <linux/gfp.h> 40 #include <linux/pfn_t.h> 41 #include <linux/memremap.h> 42 #include <linux/userfaultfd_k.h> 43 #include <linux/balloon_compaction.h> 44 #include <linux/page_idle.h> 45 #include <linux/page_owner.h> 46 #include <linux/sched/mm.h> 47 #include <linux/ptrace.h> 48 #include <linux/oom.h> 49 #include <linux/memory.h> 50 #include <linux/random.h> 51 #include <linux/sched/sysctl.h> 52 #include <linux/memory-tiers.h> 53 #include <linux/pagewalk.h> 54 55 #include <asm/tlbflush.h> 56 57 #include <trace/events/migrate.h> 58 59 #include "internal.h" 60 61 bool isolate_movable_page(struct page *page, isolate_mode_t mode) 62 { 63 struct folio *folio = folio_get_nontail_page(page); 64 const struct movable_operations *mops; 65 66 /* 67 * Avoid burning cycles with pages that are yet under __free_pages(), 68 * or just got freed under us. 69 * 70 * In case we 'win' a race for a movable page being freed under us and 71 * raise its refcount preventing __free_pages() from doing its job 72 * the put_page() at the end of this block will take care of 73 * release this page, thus avoiding a nasty leakage. 74 */ 75 if (!folio) 76 goto out; 77 78 if (unlikely(folio_test_slab(folio))) 79 goto out_putfolio; 80 /* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */ 81 smp_rmb(); 82 /* 83 * Check movable flag before taking the page lock because 84 * we use non-atomic bitops on newly allocated page flags so 85 * unconditionally grabbing the lock ruins page's owner side. 86 */ 87 if (unlikely(!__folio_test_movable(folio))) 88 goto out_putfolio; 89 /* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */ 90 smp_rmb(); 91 if (unlikely(folio_test_slab(folio))) 92 goto out_putfolio; 93 94 /* 95 * As movable pages are not isolated from LRU lists, concurrent 96 * compaction threads can race against page migration functions 97 * as well as race against the releasing a page. 98 * 99 * In order to avoid having an already isolated movable page 100 * being (wrongly) re-isolated while it is under migration, 101 * or to avoid attempting to isolate pages being released, 102 * lets be sure we have the page lock 103 * before proceeding with the movable page isolation steps. 104 */ 105 if (unlikely(!folio_trylock(folio))) 106 goto out_putfolio; 107 108 if (!folio_test_movable(folio) || folio_test_isolated(folio)) 109 goto out_no_isolated; 110 111 mops = folio_movable_ops(folio); 112 VM_BUG_ON_FOLIO(!mops, folio); 113 114 if (!mops->isolate_page(&folio->page, mode)) 115 goto out_no_isolated; 116 117 /* Driver shouldn't use the isolated flag */ 118 WARN_ON_ONCE(folio_test_isolated(folio)); 119 folio_set_isolated(folio); 120 folio_unlock(folio); 121 122 return true; 123 124 out_no_isolated: 125 folio_unlock(folio); 126 out_putfolio: 127 folio_put(folio); 128 out: 129 return false; 130 } 131 132 static void putback_movable_folio(struct folio *folio) 133 { 134 const struct movable_operations *mops = folio_movable_ops(folio); 135 136 mops->putback_page(&folio->page); 137 folio_clear_isolated(folio); 138 } 139 140 /* 141 * Put previously isolated pages back onto the appropriate lists 142 * from where they were once taken off for compaction/migration. 143 * 144 * This function shall be used whenever the isolated pageset has been 145 * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() 146 * and isolate_hugetlb(). 147 */ 148 void putback_movable_pages(struct list_head *l) 149 { 150 struct folio *folio; 151 struct folio *folio2; 152 153 list_for_each_entry_safe(folio, folio2, l, lru) { 154 if (unlikely(folio_test_hugetlb(folio))) { 155 folio_putback_active_hugetlb(folio); 156 continue; 157 } 158 list_del(&folio->lru); 159 /* 160 * We isolated non-lru movable folio so here we can use 161 * __folio_test_movable because LRU folio's mapping cannot 162 * have PAGE_MAPPING_MOVABLE. 163 */ 164 if (unlikely(__folio_test_movable(folio))) { 165 VM_BUG_ON_FOLIO(!folio_test_isolated(folio), folio); 166 folio_lock(folio); 167 if (folio_test_movable(folio)) 168 putback_movable_folio(folio); 169 else 170 folio_clear_isolated(folio); 171 folio_unlock(folio); 172 folio_put(folio); 173 } else { 174 node_stat_mod_folio(folio, NR_ISOLATED_ANON + 175 folio_is_file_lru(folio), -folio_nr_pages(folio)); 176 folio_putback_lru(folio); 177 } 178 } 179 } 180 181 /* Must be called with an elevated refcount on the non-hugetlb folio */ 182 bool isolate_folio_to_list(struct folio *folio, struct list_head *list) 183 { 184 bool isolated, lru; 185 186 if (folio_test_hugetlb(folio)) 187 return isolate_hugetlb(folio, list); 188 189 lru = !__folio_test_movable(folio); 190 if (lru) 191 isolated = folio_isolate_lru(folio); 192 else 193 isolated = isolate_movable_page(&folio->page, 194 ISOLATE_UNEVICTABLE); 195 196 if (!isolated) 197 return false; 198 199 list_add(&folio->lru, list); 200 if (lru) 201 node_stat_add_folio(folio, NR_ISOLATED_ANON + 202 folio_is_file_lru(folio)); 203 204 return true; 205 } 206 207 static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, 208 struct folio *folio, 209 unsigned long idx) 210 { 211 struct page *page = folio_page(folio, idx); 212 bool contains_data; 213 pte_t newpte; 214 void *addr; 215 216 VM_BUG_ON_PAGE(PageCompound(page), page); 217 VM_BUG_ON_PAGE(!PageAnon(page), page); 218 VM_BUG_ON_PAGE(!PageLocked(page), page); 219 VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page); 220 221 if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || 222 mm_forbids_zeropage(pvmw->vma->vm_mm)) 223 return false; 224 225 /* 226 * The pmd entry mapping the old thp was flushed and the pte mapping 227 * this subpage has been non present. If the subpage is only zero-filled 228 * then map it to the shared zeropage. 229 */ 230 addr = kmap_local_page(page); 231 contains_data = memchr_inv(addr, 0, PAGE_SIZE); 232 kunmap_local(addr); 233 234 if (contains_data) 235 return false; 236 237 newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), 238 pvmw->vma->vm_page_prot)); 239 set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); 240 241 dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); 242 return true; 243 } 244 245 struct rmap_walk_arg { 246 struct folio *folio; 247 bool map_unused_to_zeropage; 248 }; 249 250 /* 251 * Restore a potential migration pte to a working pte entry 252 */ 253 static bool remove_migration_pte(struct folio *folio, 254 struct vm_area_struct *vma, unsigned long addr, void *arg) 255 { 256 struct rmap_walk_arg *rmap_walk_arg = arg; 257 DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION); 258 259 while (page_vma_mapped_walk(&pvmw)) { 260 rmap_t rmap_flags = RMAP_NONE; 261 pte_t old_pte; 262 pte_t pte; 263 swp_entry_t entry; 264 struct page *new; 265 unsigned long idx = 0; 266 267 /* pgoff is invalid for ksm pages, but they are never large */ 268 if (folio_test_large(folio) && !folio_test_hugetlb(folio)) 269 idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff; 270 new = folio_page(folio, idx); 271 272 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 273 /* PMD-mapped THP migration entry */ 274 if (!pvmw.pte) { 275 VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) || 276 !folio_test_pmd_mappable(folio), folio); 277 remove_migration_pmd(&pvmw, new); 278 continue; 279 } 280 #endif 281 if (rmap_walk_arg->map_unused_to_zeropage && 282 try_to_map_unused_to_zeropage(&pvmw, folio, idx)) 283 continue; 284 285 folio_get(folio); 286 pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); 287 old_pte = ptep_get(pvmw.pte); 288 289 entry = pte_to_swp_entry(old_pte); 290 if (!is_migration_entry_young(entry)) 291 pte = pte_mkold(pte); 292 if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) 293 pte = pte_mkdirty(pte); 294 if (pte_swp_soft_dirty(old_pte)) 295 pte = pte_mksoft_dirty(pte); 296 else 297 pte = pte_clear_soft_dirty(pte); 298 299 if (is_writable_migration_entry(entry)) 300 pte = pte_mkwrite(pte, vma); 301 else if (pte_swp_uffd_wp(old_pte)) 302 pte = pte_mkuffd_wp(pte); 303 304 if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) 305 rmap_flags |= RMAP_EXCLUSIVE; 306 307 if (unlikely(is_device_private_page(new))) { 308 if (pte_write(pte)) 309 entry = make_writable_device_private_entry( 310 page_to_pfn(new)); 311 else 312 entry = make_readable_device_private_entry( 313 page_to_pfn(new)); 314 pte = swp_entry_to_pte(entry); 315 if (pte_swp_soft_dirty(old_pte)) 316 pte = pte_swp_mksoft_dirty(pte); 317 if (pte_swp_uffd_wp(old_pte)) 318 pte = pte_swp_mkuffd_wp(pte); 319 } 320 321 #ifdef CONFIG_HUGETLB_PAGE 322 if (folio_test_hugetlb(folio)) { 323 struct hstate *h = hstate_vma(vma); 324 unsigned int shift = huge_page_shift(h); 325 unsigned long psize = huge_page_size(h); 326 327 pte = arch_make_huge_pte(pte, shift, vma->vm_flags); 328 if (folio_test_anon(folio)) 329 hugetlb_add_anon_rmap(folio, vma, pvmw.address, 330 rmap_flags); 331 else 332 hugetlb_add_file_rmap(folio); 333 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte, 334 psize); 335 } else 336 #endif 337 { 338 if (folio_test_anon(folio)) 339 folio_add_anon_rmap_pte(folio, new, vma, 340 pvmw.address, rmap_flags); 341 else 342 folio_add_file_rmap_pte(folio, new, vma); 343 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); 344 } 345 if (vma->vm_flags & VM_LOCKED) 346 mlock_drain_local(); 347 348 trace_remove_migration_pte(pvmw.address, pte_val(pte), 349 compound_order(new)); 350 351 /* No need to invalidate - it was non-present before */ 352 update_mmu_cache(vma, pvmw.address, pvmw.pte); 353 } 354 355 return true; 356 } 357 358 /* 359 * Get rid of all migration entries and replace them by 360 * references to the indicated page. 361 */ 362 void remove_migration_ptes(struct folio *src, struct folio *dst, int flags) 363 { 364 struct rmap_walk_arg rmap_walk_arg = { 365 .folio = src, 366 .map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE, 367 }; 368 369 struct rmap_walk_control rwc = { 370 .rmap_one = remove_migration_pte, 371 .arg = &rmap_walk_arg, 372 }; 373 374 VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src); 375 376 if (flags & RMP_LOCKED) 377 rmap_walk_locked(dst, &rwc); 378 else 379 rmap_walk(dst, &rwc); 380 } 381 382 /* 383 * Something used the pte of a page under migration. We need to 384 * get to the page and wait until migration is finished. 385 * When we return from this function the fault will be retried. 386 */ 387 void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, 388 unsigned long address) 389 { 390 spinlock_t *ptl; 391 pte_t *ptep; 392 pte_t pte; 393 swp_entry_t entry; 394 395 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 396 if (!ptep) 397 return; 398 399 pte = ptep_get(ptep); 400 pte_unmap(ptep); 401 402 if (!is_swap_pte(pte)) 403 goto out; 404 405 entry = pte_to_swp_entry(pte); 406 if (!is_migration_entry(entry)) 407 goto out; 408 409 migration_entry_wait_on_locked(entry, ptl); 410 return; 411 out: 412 spin_unlock(ptl); 413 } 414 415 #ifdef CONFIG_HUGETLB_PAGE 416 /* 417 * The vma read lock must be held upon entry. Holding that lock prevents either 418 * the pte or the ptl from being freed. 419 * 420 * This function will release the vma lock before returning. 421 */ 422 void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 423 { 424 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, ptep); 425 pte_t pte; 426 427 hugetlb_vma_assert_locked(vma); 428 spin_lock(ptl); 429 pte = huge_ptep_get(vma->vm_mm, addr, ptep); 430 431 if (unlikely(!is_hugetlb_entry_migration(pte))) { 432 spin_unlock(ptl); 433 hugetlb_vma_unlock_read(vma); 434 } else { 435 /* 436 * If migration entry existed, safe to release vma lock 437 * here because the pgtable page won't be freed without the 438 * pgtable lock released. See comment right above pgtable 439 * lock release in migration_entry_wait_on_locked(). 440 */ 441 hugetlb_vma_unlock_read(vma); 442 migration_entry_wait_on_locked(pte_to_swp_entry(pte), ptl); 443 } 444 } 445 #endif 446 447 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 448 void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) 449 { 450 spinlock_t *ptl; 451 452 ptl = pmd_lock(mm, pmd); 453 if (!is_pmd_migration_entry(*pmd)) 454 goto unlock; 455 migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl); 456 return; 457 unlock: 458 spin_unlock(ptl); 459 } 460 #endif 461 462 static int folio_expected_refs(struct address_space *mapping, 463 struct folio *folio) 464 { 465 int refs = 1; 466 if (!mapping) 467 return refs; 468 469 refs += folio_nr_pages(folio); 470 if (folio_test_private(folio)) 471 refs++; 472 473 return refs; 474 } 475 476 /* 477 * Replace the folio in the mapping. 478 * 479 * The number of remaining references must be: 480 * 1 for anonymous folios without a mapping 481 * 2 for folios with a mapping 482 * 3 for folios with a mapping and PagePrivate/PagePrivate2 set. 483 */ 484 static int __folio_migrate_mapping(struct address_space *mapping, 485 struct folio *newfolio, struct folio *folio, int expected_count) 486 { 487 XA_STATE(xas, &mapping->i_pages, folio_index(folio)); 488 struct zone *oldzone, *newzone; 489 int dirty; 490 long nr = folio_nr_pages(folio); 491 long entries, i; 492 493 if (!mapping) { 494 /* Take off deferred split queue while frozen and memcg set */ 495 if (folio_test_large(folio) && 496 folio_test_large_rmappable(folio)) { 497 if (!folio_ref_freeze(folio, expected_count)) 498 return -EAGAIN; 499 folio_undo_large_rmappable(folio); 500 folio_ref_unfreeze(folio, expected_count); 501 } 502 503 /* No turning back from here */ 504 newfolio->index = folio->index; 505 newfolio->mapping = folio->mapping; 506 if (folio_test_anon(folio) && folio_test_large(folio)) 507 mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1); 508 if (folio_test_swapbacked(folio)) 509 __folio_set_swapbacked(newfolio); 510 511 return MIGRATEPAGE_SUCCESS; 512 } 513 514 oldzone = folio_zone(folio); 515 newzone = folio_zone(newfolio); 516 517 xas_lock_irq(&xas); 518 if (!folio_ref_freeze(folio, expected_count)) { 519 xas_unlock_irq(&xas); 520 return -EAGAIN; 521 } 522 523 /* Take off deferred split queue while frozen and memcg set */ 524 folio_undo_large_rmappable(folio); 525 526 /* 527 * Now we know that no one else is looking at the folio: 528 * no turning back from here. 529 */ 530 newfolio->index = folio->index; 531 newfolio->mapping = folio->mapping; 532 if (folio_test_anon(folio) && folio_test_large(folio)) 533 mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1); 534 folio_ref_add(newfolio, nr); /* add cache reference */ 535 if (folio_test_swapbacked(folio)) { 536 __folio_set_swapbacked(newfolio); 537 if (folio_test_swapcache(folio)) { 538 folio_set_swapcache(newfolio); 539 newfolio->private = folio_get_private(folio); 540 } 541 entries = nr; 542 } else { 543 VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); 544 entries = 1; 545 } 546 547 /* Move dirty while folio refs frozen and newfolio not yet exposed */ 548 dirty = folio_test_dirty(folio); 549 if (dirty) { 550 folio_clear_dirty(folio); 551 folio_set_dirty(newfolio); 552 } 553 554 /* Swap cache still stores N entries instead of a high-order entry */ 555 for (i = 0; i < entries; i++) { 556 xas_store(&xas, newfolio); 557 xas_next(&xas); 558 } 559 560 /* 561 * Drop cache reference from old folio by unfreezing 562 * to one less reference. 563 * We know this isn't the last reference. 564 */ 565 folio_ref_unfreeze(folio, expected_count - nr); 566 567 xas_unlock(&xas); 568 /* Leave irq disabled to prevent preemption while updating stats */ 569 570 /* 571 * If moved to a different zone then also account 572 * the folio for that zone. Other VM counters will be 573 * taken care of when we establish references to the 574 * new folio and drop references to the old folio. 575 * 576 * Note that anonymous folios are accounted for 577 * via NR_FILE_PAGES and NR_ANON_MAPPED if they 578 * are mapped to swap space. 579 */ 580 if (newzone != oldzone) { 581 struct lruvec *old_lruvec, *new_lruvec; 582 struct mem_cgroup *memcg; 583 584 memcg = folio_memcg(folio); 585 old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); 586 new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); 587 588 __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); 589 __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); 590 if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { 591 __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); 592 __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); 593 594 if (folio_test_pmd_mappable(folio)) { 595 __mod_lruvec_state(old_lruvec, NR_SHMEM_THPS, -nr); 596 __mod_lruvec_state(new_lruvec, NR_SHMEM_THPS, nr); 597 } 598 } 599 #ifdef CONFIG_SWAP 600 if (folio_test_swapcache(folio)) { 601 __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); 602 __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); 603 } 604 #endif 605 if (dirty && mapping_can_writeback(mapping)) { 606 __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr); 607 __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr); 608 __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr); 609 __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr); 610 } 611 } 612 local_irq_enable(); 613 614 return MIGRATEPAGE_SUCCESS; 615 } 616 617 int folio_migrate_mapping(struct address_space *mapping, 618 struct folio *newfolio, struct folio *folio, int extra_count) 619 { 620 int expected_count = folio_expected_refs(mapping, folio) + extra_count; 621 622 if (folio_ref_count(folio) != expected_count) 623 return -EAGAIN; 624 625 return __folio_migrate_mapping(mapping, newfolio, folio, expected_count); 626 } 627 EXPORT_SYMBOL(folio_migrate_mapping); 628 629 /* 630 * The expected number of remaining references is the same as that 631 * of folio_migrate_mapping(). 632 */ 633 int migrate_huge_page_move_mapping(struct address_space *mapping, 634 struct folio *dst, struct folio *src) 635 { 636 XA_STATE(xas, &mapping->i_pages, folio_index(src)); 637 int rc, expected_count = folio_expected_refs(mapping, src); 638 639 if (folio_ref_count(src) != expected_count) 640 return -EAGAIN; 641 642 rc = folio_mc_copy(dst, src); 643 if (unlikely(rc)) 644 return rc; 645 646 xas_lock_irq(&xas); 647 if (!folio_ref_freeze(src, expected_count)) { 648 xas_unlock_irq(&xas); 649 return -EAGAIN; 650 } 651 652 dst->index = src->index; 653 dst->mapping = src->mapping; 654 655 folio_ref_add(dst, folio_nr_pages(dst)); 656 657 xas_store(&xas, dst); 658 659 folio_ref_unfreeze(src, expected_count - folio_nr_pages(src)); 660 661 xas_unlock_irq(&xas); 662 663 return MIGRATEPAGE_SUCCESS; 664 } 665 666 /* 667 * Copy the flags and some other ancillary information 668 */ 669 void folio_migrate_flags(struct folio *newfolio, struct folio *folio) 670 { 671 int cpupid; 672 673 if (folio_test_referenced(folio)) 674 folio_set_referenced(newfolio); 675 if (folio_test_uptodate(folio)) 676 folio_mark_uptodate(newfolio); 677 if (folio_test_clear_active(folio)) { 678 VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); 679 folio_set_active(newfolio); 680 } else if (folio_test_clear_unevictable(folio)) 681 folio_set_unevictable(newfolio); 682 if (folio_test_workingset(folio)) 683 folio_set_workingset(newfolio); 684 if (folio_test_checked(folio)) 685 folio_set_checked(newfolio); 686 /* 687 * PG_anon_exclusive (-> PG_mappedtodisk) is always migrated via 688 * migration entries. We can still have PG_anon_exclusive set on an 689 * effectively unmapped and unreferenced first sub-pages of an 690 * anonymous THP: we can simply copy it here via PG_mappedtodisk. 691 */ 692 if (folio_test_mappedtodisk(folio)) 693 folio_set_mappedtodisk(newfolio); 694 695 /* Move dirty on pages not done by folio_migrate_mapping() */ 696 if (folio_test_dirty(folio)) 697 folio_set_dirty(newfolio); 698 699 if (folio_test_young(folio)) 700 folio_set_young(newfolio); 701 if (folio_test_idle(folio)) 702 folio_set_idle(newfolio); 703 704 /* 705 * Copy NUMA information to the new page, to prevent over-eager 706 * future migrations of this same page. 707 */ 708 cpupid = folio_xchg_last_cpupid(folio, -1); 709 /* 710 * For memory tiering mode, when migrate between slow and fast 711 * memory node, reset cpupid, because that is used to record 712 * page access time in slow memory node. 713 */ 714 if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) { 715 bool f_toptier = node_is_toptier(folio_nid(folio)); 716 bool t_toptier = node_is_toptier(folio_nid(newfolio)); 717 718 if (f_toptier != t_toptier) 719 cpupid = -1; 720 } 721 folio_xchg_last_cpupid(newfolio, cpupid); 722 723 folio_migrate_ksm(newfolio, folio); 724 /* 725 * Please do not reorder this without considering how mm/ksm.c's 726 * ksm_get_folio() depends upon ksm_migrate_page() and the 727 * swapcache flag. 728 */ 729 if (folio_test_swapcache(folio)) 730 folio_clear_swapcache(folio); 731 folio_clear_private(folio); 732 733 /* page->private contains hugetlb specific flags */ 734 if (!folio_test_hugetlb(folio)) 735 folio->private = NULL; 736 737 /* 738 * If any waiters have accumulated on the new page then 739 * wake them up. 740 */ 741 if (folio_test_writeback(newfolio)) 742 folio_end_writeback(newfolio); 743 744 /* 745 * PG_readahead shares the same bit with PG_reclaim. The above 746 * end_page_writeback() may clear PG_readahead mistakenly, so set the 747 * bit after that. 748 */ 749 if (folio_test_readahead(folio)) 750 folio_set_readahead(newfolio); 751 752 folio_copy_owner(newfolio, folio); 753 754 mem_cgroup_migrate(folio, newfolio); 755 } 756 EXPORT_SYMBOL(folio_migrate_flags); 757 758 /************************************************************ 759 * Migration functions 760 ***********************************************************/ 761 762 static int __migrate_folio(struct address_space *mapping, struct folio *dst, 763 struct folio *src, void *src_private, 764 enum migrate_mode mode) 765 { 766 int rc, expected_count = folio_expected_refs(mapping, src); 767 768 /* Check whether src does not have extra refs before we do more work */ 769 if (folio_ref_count(src) != expected_count) 770 return -EAGAIN; 771 772 rc = folio_mc_copy(dst, src); 773 if (unlikely(rc)) 774 return rc; 775 776 rc = __folio_migrate_mapping(mapping, dst, src, expected_count); 777 if (rc != MIGRATEPAGE_SUCCESS) 778 return rc; 779 780 if (src_private) 781 folio_attach_private(dst, folio_detach_private(src)); 782 783 folio_migrate_flags(dst, src); 784 return MIGRATEPAGE_SUCCESS; 785 } 786 787 /** 788 * migrate_folio() - Simple folio migration. 789 * @mapping: The address_space containing the folio. 790 * @dst: The folio to migrate the data to. 791 * @src: The folio containing the current data. 792 * @mode: How to migrate the page. 793 * 794 * Common logic to directly migrate a single LRU folio suitable for 795 * folios that do not use PagePrivate/PagePrivate2. 796 * 797 * Folios are locked upon entry and exit. 798 */ 799 int migrate_folio(struct address_space *mapping, struct folio *dst, 800 struct folio *src, enum migrate_mode mode) 801 { 802 BUG_ON(folio_test_writeback(src)); /* Writeback must be complete */ 803 return __migrate_folio(mapping, dst, src, NULL, mode); 804 } 805 EXPORT_SYMBOL(migrate_folio); 806 807 #ifdef CONFIG_BUFFER_HEAD 808 /* Returns true if all buffers are successfully locked */ 809 static bool buffer_migrate_lock_buffers(struct buffer_head *head, 810 enum migrate_mode mode) 811 { 812 struct buffer_head *bh = head; 813 struct buffer_head *failed_bh; 814 815 do { 816 if (!trylock_buffer(bh)) { 817 if (mode == MIGRATE_ASYNC) 818 goto unlock; 819 if (mode == MIGRATE_SYNC_LIGHT && !buffer_uptodate(bh)) 820 goto unlock; 821 lock_buffer(bh); 822 } 823 824 bh = bh->b_this_page; 825 } while (bh != head); 826 827 return true; 828 829 unlock: 830 /* We failed to lock the buffer and cannot stall. */ 831 failed_bh = bh; 832 bh = head; 833 while (bh != failed_bh) { 834 unlock_buffer(bh); 835 bh = bh->b_this_page; 836 } 837 838 return false; 839 } 840 841 static int __buffer_migrate_folio(struct address_space *mapping, 842 struct folio *dst, struct folio *src, enum migrate_mode mode, 843 bool check_refs) 844 { 845 struct buffer_head *bh, *head; 846 int rc; 847 int expected_count; 848 849 head = folio_buffers(src); 850 if (!head) 851 return migrate_folio(mapping, dst, src, mode); 852 853 /* Check whether page does not have extra refs before we do more work */ 854 expected_count = folio_expected_refs(mapping, src); 855 if (folio_ref_count(src) != expected_count) 856 return -EAGAIN; 857 858 if (!buffer_migrate_lock_buffers(head, mode)) 859 return -EAGAIN; 860 861 if (check_refs) { 862 bool busy; 863 bool invalidated = false; 864 865 recheck_buffers: 866 busy = false; 867 spin_lock(&mapping->i_private_lock); 868 bh = head; 869 do { 870 if (atomic_read(&bh->b_count)) { 871 busy = true; 872 break; 873 } 874 bh = bh->b_this_page; 875 } while (bh != head); 876 if (busy) { 877 if (invalidated) { 878 rc = -EAGAIN; 879 goto unlock_buffers; 880 } 881 spin_unlock(&mapping->i_private_lock); 882 invalidate_bh_lrus(); 883 invalidated = true; 884 goto recheck_buffers; 885 } 886 } 887 888 rc = filemap_migrate_folio(mapping, dst, src, mode); 889 if (rc != MIGRATEPAGE_SUCCESS) 890 goto unlock_buffers; 891 892 bh = head; 893 do { 894 folio_set_bh(bh, dst, bh_offset(bh)); 895 bh = bh->b_this_page; 896 } while (bh != head); 897 898 unlock_buffers: 899 if (check_refs) 900 spin_unlock(&mapping->i_private_lock); 901 bh = head; 902 do { 903 unlock_buffer(bh); 904 bh = bh->b_this_page; 905 } while (bh != head); 906 907 return rc; 908 } 909 910 /** 911 * buffer_migrate_folio() - Migration function for folios with buffers. 912 * @mapping: The address space containing @src. 913 * @dst: The folio to migrate to. 914 * @src: The folio to migrate from. 915 * @mode: How to migrate the folio. 916 * 917 * This function can only be used if the underlying filesystem guarantees 918 * that no other references to @src exist. For example attached buffer 919 * heads are accessed only under the folio lock. If your filesystem cannot 920 * provide this guarantee, buffer_migrate_folio_norefs() may be more 921 * appropriate. 922 * 923 * Return: 0 on success or a negative errno on failure. 924 */ 925 int buffer_migrate_folio(struct address_space *mapping, 926 struct folio *dst, struct folio *src, enum migrate_mode mode) 927 { 928 return __buffer_migrate_folio(mapping, dst, src, mode, false); 929 } 930 EXPORT_SYMBOL(buffer_migrate_folio); 931 932 /** 933 * buffer_migrate_folio_norefs() - Migration function for folios with buffers. 934 * @mapping: The address space containing @src. 935 * @dst: The folio to migrate to. 936 * @src: The folio to migrate from. 937 * @mode: How to migrate the folio. 938 * 939 * Like buffer_migrate_folio() except that this variant is more careful 940 * and checks that there are also no buffer head references. This function 941 * is the right one for mappings where buffer heads are directly looked 942 * up and referenced (such as block device mappings). 943 * 944 * Return: 0 on success or a negative errno on failure. 945 */ 946 int buffer_migrate_folio_norefs(struct address_space *mapping, 947 struct folio *dst, struct folio *src, enum migrate_mode mode) 948 { 949 return __buffer_migrate_folio(mapping, dst, src, mode, true); 950 } 951 EXPORT_SYMBOL_GPL(buffer_migrate_folio_norefs); 952 #endif /* CONFIG_BUFFER_HEAD */ 953 954 int filemap_migrate_folio(struct address_space *mapping, 955 struct folio *dst, struct folio *src, enum migrate_mode mode) 956 { 957 return __migrate_folio(mapping, dst, src, folio_get_private(src), mode); 958 } 959 EXPORT_SYMBOL_GPL(filemap_migrate_folio); 960 961 /* 962 * Writeback a folio to clean the dirty state 963 */ 964 static int writeout(struct address_space *mapping, struct folio *folio) 965 { 966 struct writeback_control wbc = { 967 .sync_mode = WB_SYNC_NONE, 968 .nr_to_write = 1, 969 .range_start = 0, 970 .range_end = LLONG_MAX, 971 .for_reclaim = 1 972 }; 973 int rc; 974 975 if (!mapping->a_ops->writepage) 976 /* No write method for the address space */ 977 return -EINVAL; 978 979 if (!folio_clear_dirty_for_io(folio)) 980 /* Someone else already triggered a write */ 981 return -EAGAIN; 982 983 /* 984 * A dirty folio may imply that the underlying filesystem has 985 * the folio on some queue. So the folio must be clean for 986 * migration. Writeout may mean we lose the lock and the 987 * folio state is no longer what we checked for earlier. 988 * At this point we know that the migration attempt cannot 989 * be successful. 990 */ 991 remove_migration_ptes(folio, folio, 0); 992 993 rc = mapping->a_ops->writepage(&folio->page, &wbc); 994 995 if (rc != AOP_WRITEPAGE_ACTIVATE) 996 /* unlocked. Relock */ 997 folio_lock(folio); 998 999 return (rc < 0) ? -EIO : -EAGAIN; 1000 } 1001 1002 /* 1003 * Default handling if a filesystem does not provide a migration function. 1004 */ 1005 static int fallback_migrate_folio(struct address_space *mapping, 1006 struct folio *dst, struct folio *src, enum migrate_mode mode) 1007 { 1008 if (folio_test_dirty(src)) { 1009 /* Only writeback folios in full synchronous migration */ 1010 switch (mode) { 1011 case MIGRATE_SYNC: 1012 break; 1013 default: 1014 return -EBUSY; 1015 } 1016 return writeout(mapping, src); 1017 } 1018 1019 /* 1020 * Buffers may be managed in a filesystem specific way. 1021 * We must have no buffers or drop them. 1022 */ 1023 if (!filemap_release_folio(src, GFP_KERNEL)) 1024 return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY; 1025 1026 return migrate_folio(mapping, dst, src, mode); 1027 } 1028 1029 /* 1030 * Move a page to a newly allocated page 1031 * The page is locked and all ptes have been successfully removed. 1032 * 1033 * The new page will have replaced the old page if this function 1034 * is successful. 1035 * 1036 * Return value: 1037 * < 0 - error code 1038 * MIGRATEPAGE_SUCCESS - success 1039 */ 1040 static int move_to_new_folio(struct folio *dst, struct folio *src, 1041 enum migrate_mode mode) 1042 { 1043 int rc = -EAGAIN; 1044 bool is_lru = !__folio_test_movable(src); 1045 1046 VM_BUG_ON_FOLIO(!folio_test_locked(src), src); 1047 VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); 1048 1049 if (likely(is_lru)) { 1050 struct address_space *mapping = folio_mapping(src); 1051 1052 if (!mapping) 1053 rc = migrate_folio(mapping, dst, src, mode); 1054 else if (mapping_inaccessible(mapping)) 1055 rc = -EOPNOTSUPP; 1056 else if (mapping->a_ops->migrate_folio) 1057 /* 1058 * Most folios have a mapping and most filesystems 1059 * provide a migrate_folio callback. Anonymous folios 1060 * are part of swap space which also has its own 1061 * migrate_folio callback. This is the most common path 1062 * for page migration. 1063 */ 1064 rc = mapping->a_ops->migrate_folio(mapping, dst, src, 1065 mode); 1066 else 1067 rc = fallback_migrate_folio(mapping, dst, src, mode); 1068 } else { 1069 const struct movable_operations *mops; 1070 1071 /* 1072 * In case of non-lru page, it could be released after 1073 * isolation step. In that case, we shouldn't try migration. 1074 */ 1075 VM_BUG_ON_FOLIO(!folio_test_isolated(src), src); 1076 if (!folio_test_movable(src)) { 1077 rc = MIGRATEPAGE_SUCCESS; 1078 folio_clear_isolated(src); 1079 goto out; 1080 } 1081 1082 mops = folio_movable_ops(src); 1083 rc = mops->migrate_page(&dst->page, &src->page, mode); 1084 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && 1085 !folio_test_isolated(src)); 1086 } 1087 1088 /* 1089 * When successful, old pagecache src->mapping must be cleared before 1090 * src is freed; but stats require that PageAnon be left as PageAnon. 1091 */ 1092 if (rc == MIGRATEPAGE_SUCCESS) { 1093 if (__folio_test_movable(src)) { 1094 VM_BUG_ON_FOLIO(!folio_test_isolated(src), src); 1095 1096 /* 1097 * We clear PG_movable under page_lock so any compactor 1098 * cannot try to migrate this page. 1099 */ 1100 folio_clear_isolated(src); 1101 } 1102 1103 /* 1104 * Anonymous and movable src->mapping will be cleared by 1105 * free_pages_prepare so don't reset it here for keeping 1106 * the type to work PageAnon, for example. 1107 */ 1108 if (!folio_mapping_flags(src)) 1109 src->mapping = NULL; 1110 1111 if (likely(!folio_is_zone_device(dst))) 1112 flush_dcache_folio(dst); 1113 } 1114 out: 1115 return rc; 1116 } 1117 1118 /* 1119 * To record some information during migration, we use unused private 1120 * field of struct folio of the newly allocated destination folio. 1121 * This is safe because nobody is using it except us. 1122 */ 1123 enum { 1124 PAGE_WAS_MAPPED = BIT(0), 1125 PAGE_WAS_MLOCKED = BIT(1), 1126 PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED, 1127 }; 1128 1129 static void __migrate_folio_record(struct folio *dst, 1130 int old_page_state, 1131 struct anon_vma *anon_vma) 1132 { 1133 dst->private = (void *)anon_vma + old_page_state; 1134 } 1135 1136 static void __migrate_folio_extract(struct folio *dst, 1137 int *old_page_state, 1138 struct anon_vma **anon_vmap) 1139 { 1140 unsigned long private = (unsigned long)dst->private; 1141 1142 *anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES); 1143 *old_page_state = private & PAGE_OLD_STATES; 1144 dst->private = NULL; 1145 } 1146 1147 /* Restore the source folio to the original state upon failure */ 1148 static void migrate_folio_undo_src(struct folio *src, 1149 int page_was_mapped, 1150 struct anon_vma *anon_vma, 1151 bool locked, 1152 struct list_head *ret) 1153 { 1154 if (page_was_mapped) 1155 remove_migration_ptes(src, src, 0); 1156 /* Drop an anon_vma reference if we took one */ 1157 if (anon_vma) 1158 put_anon_vma(anon_vma); 1159 if (locked) 1160 folio_unlock(src); 1161 if (ret) 1162 list_move_tail(&src->lru, ret); 1163 } 1164 1165 /* Restore the destination folio to the original state upon failure */ 1166 static void migrate_folio_undo_dst(struct folio *dst, bool locked, 1167 free_folio_t put_new_folio, unsigned long private) 1168 { 1169 if (locked) 1170 folio_unlock(dst); 1171 if (put_new_folio) 1172 put_new_folio(dst, private); 1173 else 1174 folio_put(dst); 1175 } 1176 1177 /* Cleanup src folio upon migration success */ 1178 static void migrate_folio_done(struct folio *src, 1179 enum migrate_reason reason) 1180 { 1181 /* 1182 * Compaction can migrate also non-LRU pages which are 1183 * not accounted to NR_ISOLATED_*. They can be recognized 1184 * as __folio_test_movable 1185 */ 1186 if (likely(!__folio_test_movable(src))) 1187 mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + 1188 folio_is_file_lru(src), -folio_nr_pages(src)); 1189 1190 if (reason != MR_MEMORY_FAILURE) 1191 /* We release the page in page_handle_poison. */ 1192 folio_put(src); 1193 } 1194 1195 /* Obtain the lock on page, remove all ptes. */ 1196 static int migrate_folio_unmap(new_folio_t get_new_folio, 1197 free_folio_t put_new_folio, unsigned long private, 1198 struct folio *src, struct folio **dstp, enum migrate_mode mode, 1199 enum migrate_reason reason, struct list_head *ret) 1200 { 1201 struct folio *dst; 1202 int rc = -EAGAIN; 1203 int old_page_state = 0; 1204 struct anon_vma *anon_vma = NULL; 1205 bool is_lru = !__folio_test_movable(src); 1206 bool locked = false; 1207 bool dst_locked = false; 1208 1209 if (folio_ref_count(src) == 1) { 1210 /* Folio was freed from under us. So we are done. */ 1211 folio_clear_active(src); 1212 folio_clear_unevictable(src); 1213 /* free_pages_prepare() will clear PG_isolated. */ 1214 list_del(&src->lru); 1215 migrate_folio_done(src, reason); 1216 return MIGRATEPAGE_SUCCESS; 1217 } 1218 1219 dst = get_new_folio(src, private); 1220 if (!dst) 1221 return -ENOMEM; 1222 *dstp = dst; 1223 1224 dst->private = NULL; 1225 1226 if (!folio_trylock(src)) { 1227 if (mode == MIGRATE_ASYNC) 1228 goto out; 1229 1230 /* 1231 * It's not safe for direct compaction to call lock_page. 1232 * For example, during page readahead pages are added locked 1233 * to the LRU. Later, when the IO completes the pages are 1234 * marked uptodate and unlocked. However, the queueing 1235 * could be merging multiple pages for one bio (e.g. 1236 * mpage_readahead). If an allocation happens for the 1237 * second or third page, the process can end up locking 1238 * the same page twice and deadlocking. Rather than 1239 * trying to be clever about what pages can be locked, 1240 * avoid the use of lock_page for direct compaction 1241 * altogether. 1242 */ 1243 if (current->flags & PF_MEMALLOC) 1244 goto out; 1245 1246 /* 1247 * In "light" mode, we can wait for transient locks (eg 1248 * inserting a page into the page table), but it's not 1249 * worth waiting for I/O. 1250 */ 1251 if (mode == MIGRATE_SYNC_LIGHT && !folio_test_uptodate(src)) 1252 goto out; 1253 1254 folio_lock(src); 1255 } 1256 locked = true; 1257 if (folio_test_mlocked(src)) 1258 old_page_state |= PAGE_WAS_MLOCKED; 1259 1260 if (folio_test_writeback(src)) { 1261 /* 1262 * Only in the case of a full synchronous migration is it 1263 * necessary to wait for PageWriteback. In the async case, 1264 * the retry loop is too short and in the sync-light case, 1265 * the overhead of stalling is too much 1266 */ 1267 switch (mode) { 1268 case MIGRATE_SYNC: 1269 break; 1270 default: 1271 rc = -EBUSY; 1272 goto out; 1273 } 1274 folio_wait_writeback(src); 1275 } 1276 1277 /* 1278 * By try_to_migrate(), src->mapcount goes down to 0 here. In this case, 1279 * we cannot notice that anon_vma is freed while we migrate a page. 1280 * This get_anon_vma() delays freeing anon_vma pointer until the end 1281 * of migration. File cache pages are no problem because of page_lock() 1282 * File Caches may use write_page() or lock_page() in migration, then, 1283 * just care Anon page here. 1284 * 1285 * Only folio_get_anon_vma() understands the subtleties of 1286 * getting a hold on an anon_vma from outside one of its mms. 1287 * But if we cannot get anon_vma, then we won't need it anyway, 1288 * because that implies that the anon page is no longer mapped 1289 * (and cannot be remapped so long as we hold the page lock). 1290 */ 1291 if (folio_test_anon(src) && !folio_test_ksm(src)) 1292 anon_vma = folio_get_anon_vma(src); 1293 1294 /* 1295 * Block others from accessing the new page when we get around to 1296 * establishing additional references. We are usually the only one 1297 * holding a reference to dst at this point. We used to have a BUG 1298 * here if folio_trylock(dst) fails, but would like to allow for 1299 * cases where there might be a race with the previous use of dst. 1300 * This is much like races on refcount of oldpage: just don't BUG(). 1301 */ 1302 if (unlikely(!folio_trylock(dst))) 1303 goto out; 1304 dst_locked = true; 1305 1306 if (unlikely(!is_lru)) { 1307 __migrate_folio_record(dst, old_page_state, anon_vma); 1308 return MIGRATEPAGE_UNMAP; 1309 } 1310 1311 /* 1312 * Corner case handling: 1313 * 1. When a new swap-cache page is read into, it is added to the LRU 1314 * and treated as swapcache but it has no rmap yet. 1315 * Calling try_to_unmap() against a src->mapping==NULL page will 1316 * trigger a BUG. So handle it here. 1317 * 2. An orphaned page (see truncate_cleanup_page) might have 1318 * fs-private metadata. The page can be picked up due to memory 1319 * offlining. Everywhere else except page reclaim, the page is 1320 * invisible to the vm, so the page can not be migrated. So try to 1321 * free the metadata, so the page can be freed. 1322 */ 1323 if (!src->mapping) { 1324 if (folio_test_private(src)) { 1325 try_to_free_buffers(src); 1326 goto out; 1327 } 1328 } else if (folio_mapped(src)) { 1329 /* Establish migration ptes */ 1330 VM_BUG_ON_FOLIO(folio_test_anon(src) && 1331 !folio_test_ksm(src) && !anon_vma, src); 1332 try_to_migrate(src, mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : 0); 1333 old_page_state |= PAGE_WAS_MAPPED; 1334 } 1335 1336 if (!folio_mapped(src)) { 1337 __migrate_folio_record(dst, old_page_state, anon_vma); 1338 return MIGRATEPAGE_UNMAP; 1339 } 1340 1341 out: 1342 /* 1343 * A folio that has not been unmapped will be restored to 1344 * right list unless we want to retry. 1345 */ 1346 if (rc == -EAGAIN) 1347 ret = NULL; 1348 1349 migrate_folio_undo_src(src, old_page_state & PAGE_WAS_MAPPED, 1350 anon_vma, locked, ret); 1351 migrate_folio_undo_dst(dst, dst_locked, put_new_folio, private); 1352 1353 return rc; 1354 } 1355 1356 /* Migrate the folio to the newly allocated folio in dst. */ 1357 static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, 1358 struct folio *src, struct folio *dst, 1359 enum migrate_mode mode, enum migrate_reason reason, 1360 struct list_head *ret) 1361 { 1362 int rc; 1363 int old_page_state = 0; 1364 struct anon_vma *anon_vma = NULL; 1365 bool is_lru = !__folio_test_movable(src); 1366 struct list_head *prev; 1367 1368 __migrate_folio_extract(dst, &old_page_state, &anon_vma); 1369 prev = dst->lru.prev; 1370 list_del(&dst->lru); 1371 1372 rc = move_to_new_folio(dst, src, mode); 1373 if (rc) 1374 goto out; 1375 1376 if (unlikely(!is_lru)) 1377 goto out_unlock_both; 1378 1379 /* 1380 * When successful, push dst to LRU immediately: so that if it 1381 * turns out to be an mlocked page, remove_migration_ptes() will 1382 * automatically build up the correct dst->mlock_count for it. 1383 * 1384 * We would like to do something similar for the old page, when 1385 * unsuccessful, and other cases when a page has been temporarily 1386 * isolated from the unevictable LRU: but this case is the easiest. 1387 */ 1388 folio_add_lru(dst); 1389 if (old_page_state & PAGE_WAS_MLOCKED) 1390 lru_add_drain(); 1391 1392 if (old_page_state & PAGE_WAS_MAPPED) 1393 remove_migration_ptes(src, dst, 0); 1394 1395 out_unlock_both: 1396 folio_unlock(dst); 1397 set_page_owner_migrate_reason(&dst->page, reason); 1398 /* 1399 * If migration is successful, decrease refcount of dst, 1400 * which will not free the page because new page owner increased 1401 * refcounter. 1402 */ 1403 folio_put(dst); 1404 1405 /* 1406 * A folio that has been migrated has all references removed 1407 * and will be freed. 1408 */ 1409 list_del(&src->lru); 1410 /* Drop an anon_vma reference if we took one */ 1411 if (anon_vma) 1412 put_anon_vma(anon_vma); 1413 folio_unlock(src); 1414 migrate_folio_done(src, reason); 1415 1416 return rc; 1417 out: 1418 /* 1419 * A folio that has not been migrated will be restored to 1420 * right list unless we want to retry. 1421 */ 1422 if (rc == -EAGAIN) { 1423 list_add(&dst->lru, prev); 1424 __migrate_folio_record(dst, old_page_state, anon_vma); 1425 return rc; 1426 } 1427 1428 migrate_folio_undo_src(src, old_page_state & PAGE_WAS_MAPPED, 1429 anon_vma, true, ret); 1430 migrate_folio_undo_dst(dst, true, put_new_folio, private); 1431 1432 return rc; 1433 } 1434 1435 /* 1436 * Counterpart of unmap_and_move_page() for hugepage migration. 1437 * 1438 * This function doesn't wait the completion of hugepage I/O 1439 * because there is no race between I/O and migration for hugepage. 1440 * Note that currently hugepage I/O occurs only in direct I/O 1441 * where no lock is held and PG_writeback is irrelevant, 1442 * and writeback status of all subpages are counted in the reference 1443 * count of the head page (i.e. if all subpages of a 2MB hugepage are 1444 * under direct I/O, the reference of the head page is 512 and a bit more.) 1445 * This means that when we try to migrate hugepage whose subpages are 1446 * doing direct I/O, some references remain after try_to_unmap() and 1447 * hugepage migration fails without data corruption. 1448 * 1449 * There is also no race when direct I/O is issued on the page under migration, 1450 * because then pte is replaced with migration swap entry and direct I/O code 1451 * will wait in the page fault for migration to complete. 1452 */ 1453 static int unmap_and_move_huge_page(new_folio_t get_new_folio, 1454 free_folio_t put_new_folio, unsigned long private, 1455 struct folio *src, int force, enum migrate_mode mode, 1456 int reason, struct list_head *ret) 1457 { 1458 struct folio *dst; 1459 int rc = -EAGAIN; 1460 int page_was_mapped = 0; 1461 struct anon_vma *anon_vma = NULL; 1462 struct address_space *mapping = NULL; 1463 1464 if (folio_ref_count(src) == 1) { 1465 /* page was freed from under us. So we are done. */ 1466 folio_putback_active_hugetlb(src); 1467 return MIGRATEPAGE_SUCCESS; 1468 } 1469 1470 dst = get_new_folio(src, private); 1471 if (!dst) 1472 return -ENOMEM; 1473 1474 if (!folio_trylock(src)) { 1475 if (!force) 1476 goto out; 1477 switch (mode) { 1478 case MIGRATE_SYNC: 1479 break; 1480 default: 1481 goto out; 1482 } 1483 folio_lock(src); 1484 } 1485 1486 /* 1487 * Check for pages which are in the process of being freed. Without 1488 * folio_mapping() set, hugetlbfs specific move page routine will not 1489 * be called and we could leak usage counts for subpools. 1490 */ 1491 if (hugetlb_folio_subpool(src) && !folio_mapping(src)) { 1492 rc = -EBUSY; 1493 goto out_unlock; 1494 } 1495 1496 if (folio_test_anon(src)) 1497 anon_vma = folio_get_anon_vma(src); 1498 1499 if (unlikely(!folio_trylock(dst))) 1500 goto put_anon; 1501 1502 if (folio_mapped(src)) { 1503 enum ttu_flags ttu = 0; 1504 1505 if (!folio_test_anon(src)) { 1506 /* 1507 * In shared mappings, try_to_unmap could potentially 1508 * call huge_pmd_unshare. Because of this, take 1509 * semaphore in write mode here and set TTU_RMAP_LOCKED 1510 * to let lower levels know we have taken the lock. 1511 */ 1512 mapping = hugetlb_folio_mapping_lock_write(src); 1513 if (unlikely(!mapping)) 1514 goto unlock_put_anon; 1515 1516 ttu = TTU_RMAP_LOCKED; 1517 } 1518 1519 try_to_migrate(src, ttu); 1520 page_was_mapped = 1; 1521 1522 if (ttu & TTU_RMAP_LOCKED) 1523 i_mmap_unlock_write(mapping); 1524 } 1525 1526 if (!folio_mapped(src)) 1527 rc = move_to_new_folio(dst, src, mode); 1528 1529 if (page_was_mapped) 1530 remove_migration_ptes(src, 1531 rc == MIGRATEPAGE_SUCCESS ? dst : src, 0); 1532 1533 unlock_put_anon: 1534 folio_unlock(dst); 1535 1536 put_anon: 1537 if (anon_vma) 1538 put_anon_vma(anon_vma); 1539 1540 if (rc == MIGRATEPAGE_SUCCESS) { 1541 move_hugetlb_state(src, dst, reason); 1542 put_new_folio = NULL; 1543 } 1544 1545 out_unlock: 1546 folio_unlock(src); 1547 out: 1548 if (rc == MIGRATEPAGE_SUCCESS) 1549 folio_putback_active_hugetlb(src); 1550 else if (rc != -EAGAIN) 1551 list_move_tail(&src->lru, ret); 1552 1553 /* 1554 * If migration was not successful and there's a freeing callback, use 1555 * it. Otherwise, put_page() will drop the reference grabbed during 1556 * isolation. 1557 */ 1558 if (put_new_folio) 1559 put_new_folio(dst, private); 1560 else 1561 folio_putback_active_hugetlb(dst); 1562 1563 return rc; 1564 } 1565 1566 static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, 1567 enum migrate_mode mode) 1568 { 1569 int rc; 1570 1571 if (mode == MIGRATE_ASYNC) { 1572 if (!folio_trylock(folio)) 1573 return -EAGAIN; 1574 } else { 1575 folio_lock(folio); 1576 } 1577 rc = split_folio_to_list(folio, split_folios); 1578 folio_unlock(folio); 1579 if (!rc) 1580 list_move_tail(&folio->lru, split_folios); 1581 1582 return rc; 1583 } 1584 1585 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1586 #define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR 1587 #else 1588 #define NR_MAX_BATCHED_MIGRATION 512 1589 #endif 1590 #define NR_MAX_MIGRATE_PAGES_RETRY 10 1591 #define NR_MAX_MIGRATE_ASYNC_RETRY 3 1592 #define NR_MAX_MIGRATE_SYNC_RETRY \ 1593 (NR_MAX_MIGRATE_PAGES_RETRY - NR_MAX_MIGRATE_ASYNC_RETRY) 1594 1595 struct migrate_pages_stats { 1596 int nr_succeeded; /* Normal and large folios migrated successfully, in 1597 units of base pages */ 1598 int nr_failed_pages; /* Normal and large folios failed to be migrated, in 1599 units of base pages. Untried folios aren't counted */ 1600 int nr_thp_succeeded; /* THP migrated successfully */ 1601 int nr_thp_failed; /* THP failed to be migrated */ 1602 int nr_thp_split; /* THP split before migrating */ 1603 int nr_split; /* Large folio (include THP) split before migrating */ 1604 }; 1605 1606 /* 1607 * Returns the number of hugetlb folios that were not migrated, or an error code 1608 * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable 1609 * any more because the list has become empty or no retryable hugetlb folios 1610 * exist any more. It is caller's responsibility to call putback_movable_pages() 1611 * only if ret != 0. 1612 */ 1613 static int migrate_hugetlbs(struct list_head *from, new_folio_t get_new_folio, 1614 free_folio_t put_new_folio, unsigned long private, 1615 enum migrate_mode mode, int reason, 1616 struct migrate_pages_stats *stats, 1617 struct list_head *ret_folios) 1618 { 1619 int retry = 1; 1620 int nr_failed = 0; 1621 int nr_retry_pages = 0; 1622 int pass = 0; 1623 struct folio *folio, *folio2; 1624 int rc, nr_pages; 1625 1626 for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) { 1627 retry = 0; 1628 nr_retry_pages = 0; 1629 1630 list_for_each_entry_safe(folio, folio2, from, lru) { 1631 if (!folio_test_hugetlb(folio)) 1632 continue; 1633 1634 nr_pages = folio_nr_pages(folio); 1635 1636 cond_resched(); 1637 1638 /* 1639 * Migratability of hugepages depends on architectures and 1640 * their size. This check is necessary because some callers 1641 * of hugepage migration like soft offline and memory 1642 * hotremove don't walk through page tables or check whether 1643 * the hugepage is pmd-based or not before kicking migration. 1644 */ 1645 if (!hugepage_migration_supported(folio_hstate(folio))) { 1646 nr_failed++; 1647 stats->nr_failed_pages += nr_pages; 1648 list_move_tail(&folio->lru, ret_folios); 1649 continue; 1650 } 1651 1652 rc = unmap_and_move_huge_page(get_new_folio, 1653 put_new_folio, private, 1654 folio, pass > 2, mode, 1655 reason, ret_folios); 1656 /* 1657 * The rules are: 1658 * Success: hugetlb folio will be put back 1659 * -EAGAIN: stay on the from list 1660 * -ENOMEM: stay on the from list 1661 * Other errno: put on ret_folios list 1662 */ 1663 switch(rc) { 1664 case -ENOMEM: 1665 /* 1666 * When memory is low, don't bother to try to migrate 1667 * other folios, just exit. 1668 */ 1669 stats->nr_failed_pages += nr_pages + nr_retry_pages; 1670 return -ENOMEM; 1671 case -EAGAIN: 1672 retry++; 1673 nr_retry_pages += nr_pages; 1674 break; 1675 case MIGRATEPAGE_SUCCESS: 1676 stats->nr_succeeded += nr_pages; 1677 break; 1678 default: 1679 /* 1680 * Permanent failure (-EBUSY, etc.): 1681 * unlike -EAGAIN case, the failed folio is 1682 * removed from migration folio list and not 1683 * retried in the next outer loop. 1684 */ 1685 nr_failed++; 1686 stats->nr_failed_pages += nr_pages; 1687 break; 1688 } 1689 } 1690 } 1691 /* 1692 * nr_failed is number of hugetlb folios failed to be migrated. After 1693 * NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb 1694 * folios as failed. 1695 */ 1696 nr_failed += retry; 1697 stats->nr_failed_pages += nr_retry_pages; 1698 1699 return nr_failed; 1700 } 1701 1702 /* 1703 * migrate_pages_batch() first unmaps folios in the from list as many as 1704 * possible, then move the unmapped folios. 1705 * 1706 * We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a 1707 * lock or bit when we have locked more than one folio. Which may cause 1708 * deadlock (e.g., for loop device). So, if mode != MIGRATE_ASYNC, the 1709 * length of the from list must be <= 1. 1710 */ 1711 static int migrate_pages_batch(struct list_head *from, 1712 new_folio_t get_new_folio, free_folio_t put_new_folio, 1713 unsigned long private, enum migrate_mode mode, int reason, 1714 struct list_head *ret_folios, struct list_head *split_folios, 1715 struct migrate_pages_stats *stats, int nr_pass) 1716 { 1717 int retry = 1; 1718 int thp_retry = 1; 1719 int nr_failed = 0; 1720 int nr_retry_pages = 0; 1721 int pass = 0; 1722 bool is_thp = false; 1723 bool is_large = false; 1724 struct folio *folio, *folio2, *dst = NULL, *dst2; 1725 int rc, rc_saved = 0, nr_pages; 1726 LIST_HEAD(unmap_folios); 1727 LIST_HEAD(dst_folios); 1728 bool nosplit = (reason == MR_NUMA_MISPLACED); 1729 1730 VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC && 1731 !list_empty(from) && !list_is_singular(from)); 1732 1733 for (pass = 0; pass < nr_pass && retry; pass++) { 1734 retry = 0; 1735 thp_retry = 0; 1736 nr_retry_pages = 0; 1737 1738 list_for_each_entry_safe(folio, folio2, from, lru) { 1739 is_large = folio_test_large(folio); 1740 is_thp = is_large && folio_test_pmd_mappable(folio); 1741 nr_pages = folio_nr_pages(folio); 1742 1743 cond_resched(); 1744 1745 /* 1746 * The rare folio on the deferred split list should 1747 * be split now. It should not count as a failure: 1748 * but increment nr_failed because, without doing so, 1749 * migrate_pages() may report success with (split but 1750 * unmigrated) pages still on its fromlist; whereas it 1751 * always reports success when its fromlist is empty. 1752 * stats->nr_thp_failed should be increased too, 1753 * otherwise stats inconsistency will happen when 1754 * migrate_pages_batch is called via migrate_pages() 1755 * with MIGRATE_SYNC and MIGRATE_ASYNC. 1756 * 1757 * Only check it without removing it from the list. 1758 * Since the folio can be on deferred_split_scan() 1759 * local list and removing it can cause the local list 1760 * corruption. Folio split process below can handle it 1761 * with the help of folio_ref_freeze(). 1762 * 1763 * nr_pages > 2 is needed to avoid checking order-1 1764 * page cache folios. They exist, in contrast to 1765 * non-existent order-1 anonymous folios, and do not 1766 * use _deferred_list. 1767 */ 1768 if (nr_pages > 2 && 1769 !list_empty(&folio->_deferred_list) && 1770 folio_test_partially_mapped(folio)) { 1771 if (!try_split_folio(folio, split_folios, mode)) { 1772 nr_failed++; 1773 stats->nr_thp_failed += is_thp; 1774 stats->nr_thp_split += is_thp; 1775 stats->nr_split++; 1776 continue; 1777 } 1778 } 1779 1780 /* 1781 * Large folio migration might be unsupported or 1782 * the allocation might be failed so we should retry 1783 * on the same folio with the large folio split 1784 * to normal folios. 1785 * 1786 * Split folios are put in split_folios, and 1787 * we will migrate them after the rest of the 1788 * list is processed. 1789 */ 1790 if (!thp_migration_supported() && is_thp) { 1791 nr_failed++; 1792 stats->nr_thp_failed++; 1793 if (!try_split_folio(folio, split_folios, mode)) { 1794 stats->nr_thp_split++; 1795 stats->nr_split++; 1796 continue; 1797 } 1798 stats->nr_failed_pages += nr_pages; 1799 list_move_tail(&folio->lru, ret_folios); 1800 continue; 1801 } 1802 1803 rc = migrate_folio_unmap(get_new_folio, put_new_folio, 1804 private, folio, &dst, mode, reason, 1805 ret_folios); 1806 /* 1807 * The rules are: 1808 * Success: folio will be freed 1809 * Unmap: folio will be put on unmap_folios list, 1810 * dst folio put on dst_folios list 1811 * -EAGAIN: stay on the from list 1812 * -ENOMEM: stay on the from list 1813 * Other errno: put on ret_folios list 1814 */ 1815 switch(rc) { 1816 case -ENOMEM: 1817 /* 1818 * When memory is low, don't bother to try to migrate 1819 * other folios, move unmapped folios, then exit. 1820 */ 1821 nr_failed++; 1822 stats->nr_thp_failed += is_thp; 1823 /* Large folio NUMA faulting doesn't split to retry. */ 1824 if (is_large && !nosplit) { 1825 int ret = try_split_folio(folio, split_folios, mode); 1826 1827 if (!ret) { 1828 stats->nr_thp_split += is_thp; 1829 stats->nr_split++; 1830 break; 1831 } else if (reason == MR_LONGTERM_PIN && 1832 ret == -EAGAIN) { 1833 /* 1834 * Try again to split large folio to 1835 * mitigate the failure of longterm pinning. 1836 */ 1837 retry++; 1838 thp_retry += is_thp; 1839 nr_retry_pages += nr_pages; 1840 /* Undo duplicated failure counting. */ 1841 nr_failed--; 1842 stats->nr_thp_failed -= is_thp; 1843 break; 1844 } 1845 } 1846 1847 stats->nr_failed_pages += nr_pages + nr_retry_pages; 1848 /* nr_failed isn't updated for not used */ 1849 stats->nr_thp_failed += thp_retry; 1850 rc_saved = rc; 1851 if (list_empty(&unmap_folios)) 1852 goto out; 1853 else 1854 goto move; 1855 case -EAGAIN: 1856 retry++; 1857 thp_retry += is_thp; 1858 nr_retry_pages += nr_pages; 1859 break; 1860 case MIGRATEPAGE_SUCCESS: 1861 stats->nr_succeeded += nr_pages; 1862 stats->nr_thp_succeeded += is_thp; 1863 break; 1864 case MIGRATEPAGE_UNMAP: 1865 list_move_tail(&folio->lru, &unmap_folios); 1866 list_add_tail(&dst->lru, &dst_folios); 1867 break; 1868 default: 1869 /* 1870 * Permanent failure (-EBUSY, etc.): 1871 * unlike -EAGAIN case, the failed folio is 1872 * removed from migration folio list and not 1873 * retried in the next outer loop. 1874 */ 1875 nr_failed++; 1876 stats->nr_thp_failed += is_thp; 1877 stats->nr_failed_pages += nr_pages; 1878 break; 1879 } 1880 } 1881 } 1882 nr_failed += retry; 1883 stats->nr_thp_failed += thp_retry; 1884 stats->nr_failed_pages += nr_retry_pages; 1885 move: 1886 /* Flush TLBs for all unmapped folios */ 1887 try_to_unmap_flush(); 1888 1889 retry = 1; 1890 for (pass = 0; pass < nr_pass && retry; pass++) { 1891 retry = 0; 1892 thp_retry = 0; 1893 nr_retry_pages = 0; 1894 1895 dst = list_first_entry(&dst_folios, struct folio, lru); 1896 dst2 = list_next_entry(dst, lru); 1897 list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) { 1898 is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio); 1899 nr_pages = folio_nr_pages(folio); 1900 1901 cond_resched(); 1902 1903 rc = migrate_folio_move(put_new_folio, private, 1904 folio, dst, mode, 1905 reason, ret_folios); 1906 /* 1907 * The rules are: 1908 * Success: folio will be freed 1909 * -EAGAIN: stay on the unmap_folios list 1910 * Other errno: put on ret_folios list 1911 */ 1912 switch(rc) { 1913 case -EAGAIN: 1914 retry++; 1915 thp_retry += is_thp; 1916 nr_retry_pages += nr_pages; 1917 break; 1918 case MIGRATEPAGE_SUCCESS: 1919 stats->nr_succeeded += nr_pages; 1920 stats->nr_thp_succeeded += is_thp; 1921 break; 1922 default: 1923 nr_failed++; 1924 stats->nr_thp_failed += is_thp; 1925 stats->nr_failed_pages += nr_pages; 1926 break; 1927 } 1928 dst = dst2; 1929 dst2 = list_next_entry(dst, lru); 1930 } 1931 } 1932 nr_failed += retry; 1933 stats->nr_thp_failed += thp_retry; 1934 stats->nr_failed_pages += nr_retry_pages; 1935 1936 rc = rc_saved ? : nr_failed; 1937 out: 1938 /* Cleanup remaining folios */ 1939 dst = list_first_entry(&dst_folios, struct folio, lru); 1940 dst2 = list_next_entry(dst, lru); 1941 list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) { 1942 int old_page_state = 0; 1943 struct anon_vma *anon_vma = NULL; 1944 1945 __migrate_folio_extract(dst, &old_page_state, &anon_vma); 1946 migrate_folio_undo_src(folio, old_page_state & PAGE_WAS_MAPPED, 1947 anon_vma, true, ret_folios); 1948 list_del(&dst->lru); 1949 migrate_folio_undo_dst(dst, true, put_new_folio, private); 1950 dst = dst2; 1951 dst2 = list_next_entry(dst, lru); 1952 } 1953 1954 return rc; 1955 } 1956 1957 static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio, 1958 free_folio_t put_new_folio, unsigned long private, 1959 enum migrate_mode mode, int reason, 1960 struct list_head *ret_folios, struct list_head *split_folios, 1961 struct migrate_pages_stats *stats) 1962 { 1963 int rc, nr_failed = 0; 1964 LIST_HEAD(folios); 1965 struct migrate_pages_stats astats; 1966 1967 memset(&astats, 0, sizeof(astats)); 1968 /* Try to migrate in batch with MIGRATE_ASYNC mode firstly */ 1969 rc = migrate_pages_batch(from, get_new_folio, put_new_folio, private, MIGRATE_ASYNC, 1970 reason, &folios, split_folios, &astats, 1971 NR_MAX_MIGRATE_ASYNC_RETRY); 1972 stats->nr_succeeded += astats.nr_succeeded; 1973 stats->nr_thp_succeeded += astats.nr_thp_succeeded; 1974 stats->nr_thp_split += astats.nr_thp_split; 1975 stats->nr_split += astats.nr_split; 1976 if (rc < 0) { 1977 stats->nr_failed_pages += astats.nr_failed_pages; 1978 stats->nr_thp_failed += astats.nr_thp_failed; 1979 list_splice_tail(&folios, ret_folios); 1980 return rc; 1981 } 1982 stats->nr_thp_failed += astats.nr_thp_split; 1983 /* 1984 * Do not count rc, as pages will be retried below. 1985 * Count nr_split only, since it includes nr_thp_split. 1986 */ 1987 nr_failed += astats.nr_split; 1988 /* 1989 * Fall back to migrate all failed folios one by one synchronously. All 1990 * failed folios except split THPs will be retried, so their failure 1991 * isn't counted 1992 */ 1993 list_splice_tail_init(&folios, from); 1994 while (!list_empty(from)) { 1995 list_move(from->next, &folios); 1996 rc = migrate_pages_batch(&folios, get_new_folio, put_new_folio, 1997 private, mode, reason, ret_folios, 1998 split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY); 1999 list_splice_tail_init(&folios, ret_folios); 2000 if (rc < 0) 2001 return rc; 2002 nr_failed += rc; 2003 } 2004 2005 return nr_failed; 2006 } 2007 2008 /* 2009 * migrate_pages - migrate the folios specified in a list, to the free folios 2010 * supplied as the target for the page migration 2011 * 2012 * @from: The list of folios to be migrated. 2013 * @get_new_folio: The function used to allocate free folios to be used 2014 * as the target of the folio migration. 2015 * @put_new_folio: The function used to free target folios if migration 2016 * fails, or NULL if no special handling is necessary. 2017 * @private: Private data to be passed on to get_new_folio() 2018 * @mode: The migration mode that specifies the constraints for 2019 * folio migration, if any. 2020 * @reason: The reason for folio migration. 2021 * @ret_succeeded: Set to the number of folios migrated successfully if 2022 * the caller passes a non-NULL pointer. 2023 * 2024 * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios 2025 * are movable any more because the list has become empty or no retryable folios 2026 * exist any more. It is caller's responsibility to call putback_movable_pages() 2027 * only if ret != 0. 2028 * 2029 * Returns the number of {normal folio, large folio, hugetlb} that were not 2030 * migrated, or an error code. The number of large folio splits will be 2031 * considered as the number of non-migrated large folio, no matter how many 2032 * split folios of the large folio are migrated successfully. 2033 */ 2034 int migrate_pages(struct list_head *from, new_folio_t get_new_folio, 2035 free_folio_t put_new_folio, unsigned long private, 2036 enum migrate_mode mode, int reason, unsigned int *ret_succeeded) 2037 { 2038 int rc, rc_gather; 2039 int nr_pages; 2040 struct folio *folio, *folio2; 2041 LIST_HEAD(folios); 2042 LIST_HEAD(ret_folios); 2043 LIST_HEAD(split_folios); 2044 struct migrate_pages_stats stats; 2045 2046 trace_mm_migrate_pages_start(mode, reason); 2047 2048 memset(&stats, 0, sizeof(stats)); 2049 2050 rc_gather = migrate_hugetlbs(from, get_new_folio, put_new_folio, private, 2051 mode, reason, &stats, &ret_folios); 2052 if (rc_gather < 0) 2053 goto out; 2054 2055 again: 2056 nr_pages = 0; 2057 list_for_each_entry_safe(folio, folio2, from, lru) { 2058 /* Retried hugetlb folios will be kept in list */ 2059 if (folio_test_hugetlb(folio)) { 2060 list_move_tail(&folio->lru, &ret_folios); 2061 continue; 2062 } 2063 2064 nr_pages += folio_nr_pages(folio); 2065 if (nr_pages >= NR_MAX_BATCHED_MIGRATION) 2066 break; 2067 } 2068 if (nr_pages >= NR_MAX_BATCHED_MIGRATION) 2069 list_cut_before(&folios, from, &folio2->lru); 2070 else 2071 list_splice_init(from, &folios); 2072 if (mode == MIGRATE_ASYNC) 2073 rc = migrate_pages_batch(&folios, get_new_folio, put_new_folio, 2074 private, mode, reason, &ret_folios, 2075 &split_folios, &stats, 2076 NR_MAX_MIGRATE_PAGES_RETRY); 2077 else 2078 rc = migrate_pages_sync(&folios, get_new_folio, put_new_folio, 2079 private, mode, reason, &ret_folios, 2080 &split_folios, &stats); 2081 list_splice_tail_init(&folios, &ret_folios); 2082 if (rc < 0) { 2083 rc_gather = rc; 2084 list_splice_tail(&split_folios, &ret_folios); 2085 goto out; 2086 } 2087 if (!list_empty(&split_folios)) { 2088 /* 2089 * Failure isn't counted since all split folios of a large folio 2090 * is counted as 1 failure already. And, we only try to migrate 2091 * with minimal effort, force MIGRATE_ASYNC mode and retry once. 2092 */ 2093 migrate_pages_batch(&split_folios, get_new_folio, 2094 put_new_folio, private, MIGRATE_ASYNC, reason, 2095 &ret_folios, NULL, &stats, 1); 2096 list_splice_tail_init(&split_folios, &ret_folios); 2097 } 2098 rc_gather += rc; 2099 if (!list_empty(from)) 2100 goto again; 2101 out: 2102 /* 2103 * Put the permanent failure folio back to migration list, they 2104 * will be put back to the right list by the caller. 2105 */ 2106 list_splice(&ret_folios, from); 2107 2108 /* 2109 * Return 0 in case all split folios of fail-to-migrate large folios 2110 * are migrated successfully. 2111 */ 2112 if (list_empty(from)) 2113 rc_gather = 0; 2114 2115 count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); 2116 count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); 2117 count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); 2118 count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); 2119 count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); 2120 trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, 2121 stats.nr_thp_succeeded, stats.nr_thp_failed, 2122 stats.nr_thp_split, stats.nr_split, mode, 2123 reason); 2124 2125 if (ret_succeeded) 2126 *ret_succeeded = stats.nr_succeeded; 2127 2128 return rc_gather; 2129 } 2130 2131 struct folio *alloc_migration_target(struct folio *src, unsigned long private) 2132 { 2133 struct migration_target_control *mtc; 2134 gfp_t gfp_mask; 2135 unsigned int order = 0; 2136 int nid; 2137 int zidx; 2138 2139 mtc = (struct migration_target_control *)private; 2140 gfp_mask = mtc->gfp_mask; 2141 nid = mtc->nid; 2142 if (nid == NUMA_NO_NODE) 2143 nid = folio_nid(src); 2144 2145 if (folio_test_hugetlb(src)) { 2146 struct hstate *h = folio_hstate(src); 2147 2148 gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); 2149 return alloc_hugetlb_folio_nodemask(h, nid, 2150 mtc->nmask, gfp_mask, 2151 htlb_allow_alloc_fallback(mtc->reason)); 2152 } 2153 2154 if (folio_test_large(src)) { 2155 /* 2156 * clear __GFP_RECLAIM to make the migration callback 2157 * consistent with regular THP allocations. 2158 */ 2159 gfp_mask &= ~__GFP_RECLAIM; 2160 gfp_mask |= GFP_TRANSHUGE; 2161 order = folio_order(src); 2162 } 2163 zidx = zone_idx(folio_zone(src)); 2164 if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) 2165 gfp_mask |= __GFP_HIGHMEM; 2166 2167 return __folio_alloc(gfp_mask, order, nid, mtc->nmask); 2168 } 2169 2170 #ifdef CONFIG_NUMA 2171 2172 static int store_status(int __user *status, int start, int value, int nr) 2173 { 2174 while (nr-- > 0) { 2175 if (put_user(value, status + start)) 2176 return -EFAULT; 2177 start++; 2178 } 2179 2180 return 0; 2181 } 2182 2183 static int do_move_pages_to_node(struct list_head *pagelist, int node) 2184 { 2185 int err; 2186 struct migration_target_control mtc = { 2187 .nid = node, 2188 .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 2189 .reason = MR_SYSCALL, 2190 }; 2191 2192 err = migrate_pages(pagelist, alloc_migration_target, NULL, 2193 (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); 2194 if (err) 2195 putback_movable_pages(pagelist); 2196 return err; 2197 } 2198 2199 static int __add_folio_for_migration(struct folio *folio, int node, 2200 struct list_head *pagelist, bool migrate_all) 2201 { 2202 if (is_zero_folio(folio) || is_huge_zero_folio(folio)) 2203 return -EFAULT; 2204 2205 if (folio_is_zone_device(folio)) 2206 return -ENOENT; 2207 2208 if (folio_nid(folio) == node) 2209 return 0; 2210 2211 if (folio_likely_mapped_shared(folio) && !migrate_all) 2212 return -EACCES; 2213 2214 if (folio_test_hugetlb(folio)) { 2215 if (isolate_hugetlb(folio, pagelist)) 2216 return 1; 2217 } else if (folio_isolate_lru(folio)) { 2218 list_add_tail(&folio->lru, pagelist); 2219 node_stat_mod_folio(folio, 2220 NR_ISOLATED_ANON + folio_is_file_lru(folio), 2221 folio_nr_pages(folio)); 2222 return 1; 2223 } 2224 return -EBUSY; 2225 } 2226 2227 /* 2228 * Resolves the given address to a struct folio, isolates it from the LRU and 2229 * puts it to the given pagelist. 2230 * Returns: 2231 * errno - if the folio cannot be found/isolated 2232 * 0 - when it doesn't have to be migrated because it is already on the 2233 * target node 2234 * 1 - when it has been queued 2235 */ 2236 static int add_folio_for_migration(struct mm_struct *mm, const void __user *p, 2237 int node, struct list_head *pagelist, bool migrate_all) 2238 { 2239 struct vm_area_struct *vma; 2240 struct folio_walk fw; 2241 struct folio *folio; 2242 unsigned long addr; 2243 int err = -EFAULT; 2244 2245 mmap_read_lock(mm); 2246 addr = (unsigned long)untagged_addr_remote(mm, p); 2247 2248 vma = vma_lookup(mm, addr); 2249 if (vma && vma_migratable(vma)) { 2250 folio = folio_walk_start(&fw, vma, addr, FW_ZEROPAGE); 2251 if (folio) { 2252 err = __add_folio_for_migration(folio, node, pagelist, 2253 migrate_all); 2254 folio_walk_end(&fw, vma); 2255 } else { 2256 err = -ENOENT; 2257 } 2258 } 2259 mmap_read_unlock(mm); 2260 return err; 2261 } 2262 2263 static int move_pages_and_store_status(int node, 2264 struct list_head *pagelist, int __user *status, 2265 int start, int i, unsigned long nr_pages) 2266 { 2267 int err; 2268 2269 if (list_empty(pagelist)) 2270 return 0; 2271 2272 err = do_move_pages_to_node(pagelist, node); 2273 if (err) { 2274 /* 2275 * Positive err means the number of failed 2276 * pages to migrate. Since we are going to 2277 * abort and return the number of non-migrated 2278 * pages, so need to include the rest of the 2279 * nr_pages that have not been attempted as 2280 * well. 2281 */ 2282 if (err > 0) 2283 err += nr_pages - i; 2284 return err; 2285 } 2286 return store_status(status, start, node, i - start); 2287 } 2288 2289 /* 2290 * Migrate an array of page address onto an array of nodes and fill 2291 * the corresponding array of status. 2292 */ 2293 static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, 2294 unsigned long nr_pages, 2295 const void __user * __user *pages, 2296 const int __user *nodes, 2297 int __user *status, int flags) 2298 { 2299 compat_uptr_t __user *compat_pages = (void __user *)pages; 2300 int current_node = NUMA_NO_NODE; 2301 LIST_HEAD(pagelist); 2302 int start, i; 2303 int err = 0, err1; 2304 2305 lru_cache_disable(); 2306 2307 for (i = start = 0; i < nr_pages; i++) { 2308 const void __user *p; 2309 int node; 2310 2311 err = -EFAULT; 2312 if (in_compat_syscall()) { 2313 compat_uptr_t cp; 2314 2315 if (get_user(cp, compat_pages + i)) 2316 goto out_flush; 2317 2318 p = compat_ptr(cp); 2319 } else { 2320 if (get_user(p, pages + i)) 2321 goto out_flush; 2322 } 2323 if (get_user(node, nodes + i)) 2324 goto out_flush; 2325 2326 err = -ENODEV; 2327 if (node < 0 || node >= MAX_NUMNODES) 2328 goto out_flush; 2329 if (!node_state(node, N_MEMORY)) 2330 goto out_flush; 2331 2332 err = -EACCES; 2333 if (!node_isset(node, task_nodes)) 2334 goto out_flush; 2335 2336 if (current_node == NUMA_NO_NODE) { 2337 current_node = node; 2338 start = i; 2339 } else if (node != current_node) { 2340 err = move_pages_and_store_status(current_node, 2341 &pagelist, status, start, i, nr_pages); 2342 if (err) 2343 goto out; 2344 start = i; 2345 current_node = node; 2346 } 2347 2348 /* 2349 * Errors in the page lookup or isolation are not fatal and we simply 2350 * report them via status 2351 */ 2352 err = add_folio_for_migration(mm, p, current_node, &pagelist, 2353 flags & MPOL_MF_MOVE_ALL); 2354 2355 if (err > 0) { 2356 /* The page is successfully queued for migration */ 2357 continue; 2358 } 2359 2360 /* 2361 * The move_pages() man page does not have an -EEXIST choice, so 2362 * use -EFAULT instead. 2363 */ 2364 if (err == -EEXIST) 2365 err = -EFAULT; 2366 2367 /* 2368 * If the page is already on the target node (!err), store the 2369 * node, otherwise, store the err. 2370 */ 2371 err = store_status(status, i, err ? : current_node, 1); 2372 if (err) 2373 goto out_flush; 2374 2375 err = move_pages_and_store_status(current_node, &pagelist, 2376 status, start, i, nr_pages); 2377 if (err) { 2378 /* We have accounted for page i */ 2379 if (err > 0) 2380 err--; 2381 goto out; 2382 } 2383 current_node = NUMA_NO_NODE; 2384 } 2385 out_flush: 2386 /* Make sure we do not overwrite the existing error */ 2387 err1 = move_pages_and_store_status(current_node, &pagelist, 2388 status, start, i, nr_pages); 2389 if (err >= 0) 2390 err = err1; 2391 out: 2392 lru_cache_enable(); 2393 return err; 2394 } 2395 2396 /* 2397 * Determine the nodes of an array of pages and store it in an array of status. 2398 */ 2399 static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, 2400 const void __user **pages, int *status) 2401 { 2402 unsigned long i; 2403 2404 mmap_read_lock(mm); 2405 2406 for (i = 0; i < nr_pages; i++) { 2407 unsigned long addr = (unsigned long)(*pages); 2408 struct vm_area_struct *vma; 2409 struct folio_walk fw; 2410 struct folio *folio; 2411 int err = -EFAULT; 2412 2413 vma = vma_lookup(mm, addr); 2414 if (!vma) 2415 goto set_status; 2416 2417 folio = folio_walk_start(&fw, vma, addr, FW_ZEROPAGE); 2418 if (folio) { 2419 if (is_zero_folio(folio) || is_huge_zero_folio(folio)) 2420 err = -EFAULT; 2421 else if (folio_is_zone_device(folio)) 2422 err = -ENOENT; 2423 else 2424 err = folio_nid(folio); 2425 folio_walk_end(&fw, vma); 2426 } else { 2427 err = -ENOENT; 2428 } 2429 set_status: 2430 *status = err; 2431 2432 pages++; 2433 status++; 2434 } 2435 2436 mmap_read_unlock(mm); 2437 } 2438 2439 static int get_compat_pages_array(const void __user *chunk_pages[], 2440 const void __user * __user *pages, 2441 unsigned long chunk_nr) 2442 { 2443 compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages; 2444 compat_uptr_t p; 2445 int i; 2446 2447 for (i = 0; i < chunk_nr; i++) { 2448 if (get_user(p, pages32 + i)) 2449 return -EFAULT; 2450 chunk_pages[i] = compat_ptr(p); 2451 } 2452 2453 return 0; 2454 } 2455 2456 /* 2457 * Determine the nodes of a user array of pages and store it in 2458 * a user array of status. 2459 */ 2460 static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, 2461 const void __user * __user *pages, 2462 int __user *status) 2463 { 2464 #define DO_PAGES_STAT_CHUNK_NR 16UL 2465 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; 2466 int chunk_status[DO_PAGES_STAT_CHUNK_NR]; 2467 2468 while (nr_pages) { 2469 unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR); 2470 2471 if (in_compat_syscall()) { 2472 if (get_compat_pages_array(chunk_pages, pages, 2473 chunk_nr)) 2474 break; 2475 } else { 2476 if (copy_from_user(chunk_pages, pages, 2477 chunk_nr * sizeof(*chunk_pages))) 2478 break; 2479 } 2480 2481 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); 2482 2483 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) 2484 break; 2485 2486 pages += chunk_nr; 2487 status += chunk_nr; 2488 nr_pages -= chunk_nr; 2489 } 2490 return nr_pages ? -EFAULT : 0; 2491 } 2492 2493 static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes) 2494 { 2495 struct task_struct *task; 2496 struct mm_struct *mm; 2497 2498 /* 2499 * There is no need to check if current process has the right to modify 2500 * the specified process when they are same. 2501 */ 2502 if (!pid) { 2503 mmget(current->mm); 2504 *mem_nodes = cpuset_mems_allowed(current); 2505 return current->mm; 2506 } 2507 2508 /* Find the mm_struct */ 2509 rcu_read_lock(); 2510 task = find_task_by_vpid(pid); 2511 if (!task) { 2512 rcu_read_unlock(); 2513 return ERR_PTR(-ESRCH); 2514 } 2515 get_task_struct(task); 2516 2517 /* 2518 * Check if this process has the right to modify the specified 2519 * process. Use the regular "ptrace_may_access()" checks. 2520 */ 2521 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { 2522 rcu_read_unlock(); 2523 mm = ERR_PTR(-EPERM); 2524 goto out; 2525 } 2526 rcu_read_unlock(); 2527 2528 mm = ERR_PTR(security_task_movememory(task)); 2529 if (IS_ERR(mm)) 2530 goto out; 2531 *mem_nodes = cpuset_mems_allowed(task); 2532 mm = get_task_mm(task); 2533 out: 2534 put_task_struct(task); 2535 if (!mm) 2536 mm = ERR_PTR(-EINVAL); 2537 return mm; 2538 } 2539 2540 /* 2541 * Move a list of pages in the address space of the currently executing 2542 * process. 2543 */ 2544 static int kernel_move_pages(pid_t pid, unsigned long nr_pages, 2545 const void __user * __user *pages, 2546 const int __user *nodes, 2547 int __user *status, int flags) 2548 { 2549 struct mm_struct *mm; 2550 int err; 2551 nodemask_t task_nodes; 2552 2553 /* Check flags */ 2554 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 2555 return -EINVAL; 2556 2557 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) 2558 return -EPERM; 2559 2560 mm = find_mm_struct(pid, &task_nodes); 2561 if (IS_ERR(mm)) 2562 return PTR_ERR(mm); 2563 2564 if (nodes) 2565 err = do_pages_move(mm, task_nodes, nr_pages, pages, 2566 nodes, status, flags); 2567 else 2568 err = do_pages_stat(mm, nr_pages, pages, status); 2569 2570 mmput(mm); 2571 return err; 2572 } 2573 2574 SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, 2575 const void __user * __user *, pages, 2576 const int __user *, nodes, 2577 int __user *, status, int, flags) 2578 { 2579 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); 2580 } 2581 2582 #ifdef CONFIG_NUMA_BALANCING 2583 /* 2584 * Returns true if this is a safe migration target node for misplaced NUMA 2585 * pages. Currently it only checks the watermarks which is crude. 2586 */ 2587 static bool migrate_balanced_pgdat(struct pglist_data *pgdat, 2588 unsigned long nr_migrate_pages) 2589 { 2590 int z; 2591 2592 for (z = pgdat->nr_zones - 1; z >= 0; z--) { 2593 struct zone *zone = pgdat->node_zones + z; 2594 2595 if (!managed_zone(zone)) 2596 continue; 2597 2598 /* Avoid waking kswapd by allocating pages_to_migrate pages. */ 2599 if (!zone_watermark_ok(zone, 0, 2600 high_wmark_pages(zone) + 2601 nr_migrate_pages, 2602 ZONE_MOVABLE, ALLOC_CMA)) 2603 continue; 2604 return true; 2605 } 2606 return false; 2607 } 2608 2609 static struct folio *alloc_misplaced_dst_folio(struct folio *src, 2610 unsigned long data) 2611 { 2612 int nid = (int) data; 2613 int order = folio_order(src); 2614 gfp_t gfp = __GFP_THISNODE; 2615 2616 if (order > 0) 2617 gfp |= GFP_TRANSHUGE_LIGHT; 2618 else { 2619 gfp |= GFP_HIGHUSER_MOVABLE | __GFP_NOMEMALLOC | __GFP_NORETRY | 2620 __GFP_NOWARN; 2621 gfp &= ~__GFP_RECLAIM; 2622 } 2623 return __folio_alloc_node(gfp, order, nid); 2624 } 2625 2626 /* 2627 * Prepare for calling migrate_misplaced_folio() by isolating the folio if 2628 * permitted. Must be called with the PTL still held. 2629 */ 2630 int migrate_misplaced_folio_prepare(struct folio *folio, 2631 struct vm_area_struct *vma, int node) 2632 { 2633 int nr_pages = folio_nr_pages(folio); 2634 pg_data_t *pgdat = NODE_DATA(node); 2635 2636 if (folio_is_file_lru(folio)) { 2637 /* 2638 * Do not migrate file folios that are mapped in multiple 2639 * processes with execute permissions as they are probably 2640 * shared libraries. 2641 * 2642 * See folio_likely_mapped_shared() on possible imprecision 2643 * when we cannot easily detect if a folio is shared. 2644 */ 2645 if ((vma->vm_flags & VM_EXEC) && 2646 folio_likely_mapped_shared(folio)) 2647 return -EACCES; 2648 2649 /* 2650 * Do not migrate dirty folios as not all filesystems can move 2651 * dirty folios in MIGRATE_ASYNC mode which is a waste of 2652 * cycles. 2653 */ 2654 if (folio_test_dirty(folio)) 2655 return -EAGAIN; 2656 } 2657 2658 /* Avoid migrating to a node that is nearly full */ 2659 if (!migrate_balanced_pgdat(pgdat, nr_pages)) { 2660 int z; 2661 2662 if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)) 2663 return -EAGAIN; 2664 for (z = pgdat->nr_zones - 1; z >= 0; z--) { 2665 if (managed_zone(pgdat->node_zones + z)) 2666 break; 2667 } 2668 2669 /* 2670 * If there are no managed zones, it should not proceed 2671 * further. 2672 */ 2673 if (z < 0) 2674 return -EAGAIN; 2675 2676 wakeup_kswapd(pgdat->node_zones + z, 0, 2677 folio_order(folio), ZONE_MOVABLE); 2678 return -EAGAIN; 2679 } 2680 2681 if (!folio_isolate_lru(folio)) 2682 return -EAGAIN; 2683 2684 node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), 2685 nr_pages); 2686 return 0; 2687 } 2688 2689 /* 2690 * Attempt to migrate a misplaced folio to the specified destination 2691 * node. Caller is expected to have isolated the folio by calling 2692 * migrate_misplaced_folio_prepare(), which will result in an 2693 * elevated reference count on the folio. This function will un-isolate the 2694 * folio, dereferencing the folio before returning. 2695 */ 2696 int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, 2697 int node) 2698 { 2699 pg_data_t *pgdat = NODE_DATA(node); 2700 int nr_remaining; 2701 unsigned int nr_succeeded; 2702 LIST_HEAD(migratepages); 2703 struct mem_cgroup *memcg = get_mem_cgroup_from_folio(folio); 2704 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); 2705 2706 list_add(&folio->lru, &migratepages); 2707 nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio, 2708 NULL, node, MIGRATE_ASYNC, 2709 MR_NUMA_MISPLACED, &nr_succeeded); 2710 if (nr_remaining && !list_empty(&migratepages)) 2711 putback_movable_pages(&migratepages); 2712 if (nr_succeeded) { 2713 count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded); 2714 count_memcg_events(memcg, NUMA_PAGE_MIGRATE, nr_succeeded); 2715 if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) 2716 && !node_is_toptier(folio_nid(folio)) 2717 && node_is_toptier(node)) 2718 mod_lruvec_state(lruvec, PGPROMOTE_SUCCESS, nr_succeeded); 2719 } 2720 mem_cgroup_put(memcg); 2721 BUG_ON(!list_empty(&migratepages)); 2722 return nr_remaining ? -EAGAIN : 0; 2723 } 2724 #endif /* CONFIG_NUMA_BALANCING */ 2725 #endif /* CONFIG_NUMA */ 2726