khugepaged.c (b7d349c741293b694c552593dbd7d38ea7eb7143) khugepaged.c (b93b016313b3ba8003c3b8bb71f569af91f19fc7)
1// SPDX-License-Identifier: GPL-2.0
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/mm.h>
5#include <linux/sched.h>
6#include <linux/sched/mm.h>
7#include <linux/sched/coredump.h>
8#include <linux/mmu_notifier.h>

--- 1330 unchanged lines hidden (view full) ---

1339 /*
1340 * At this point the new_page is 'frozen' (page_count() is zero), locked
1341 * and not up-to-date. It's safe to insert it into radix tree, because
1342 * nobody would be able to map it or use it in other way until we
1343 * unfreeze it.
1344 */
1345
1346 index = start;
1// SPDX-License-Identifier: GPL-2.0
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/mm.h>
5#include <linux/sched.h>
6#include <linux/sched/mm.h>
7#include <linux/sched/coredump.h>
8#include <linux/mmu_notifier.h>

--- 1330 unchanged lines hidden (view full) ---

1339 /*
1340 * At this point the new_page is 'frozen' (page_count() is zero), locked
1341 * and not up-to-date. It's safe to insert it into radix tree, because
1342 * nobody would be able to map it or use it in other way until we
1343 * unfreeze it.
1344 */
1345
1346 index = start;
1347 spin_lock_irq(&mapping->tree_lock);
1348 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1347 xa_lock_irq(&mapping->i_pages);
1348 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1349 int n = min(iter.index, end) - index;
1350
1351 /*
1352 * Handle holes in the radix tree: charge it from shmem and
1353 * insert relevant subpage of new_page into the radix-tree.
1354 */
1355 if (n && !shmem_charge(mapping->host, n)) {
1356 result = SCAN_FAIL;
1357 break;
1358 }
1359 nr_none += n;
1360 for (; index < min(iter.index, end); index++) {
1349 int n = min(iter.index, end) - index;
1350
1351 /*
1352 * Handle holes in the radix tree: charge it from shmem and
1353 * insert relevant subpage of new_page into the radix-tree.
1354 */
1355 if (n && !shmem_charge(mapping->host, n)) {
1356 result = SCAN_FAIL;
1357 break;
1358 }
1359 nr_none += n;
1360 for (; index < min(iter.index, end); index++) {
1361 radix_tree_insert(&mapping->page_tree, index,
1361 radix_tree_insert(&mapping->i_pages, index,
1362 new_page + (index % HPAGE_PMD_NR));
1363 }
1364
1365 /* We are done. */
1366 if (index >= end)
1367 break;
1368
1369 page = radix_tree_deref_slot_protected(slot,
1362 new_page + (index % HPAGE_PMD_NR));
1363 }
1364
1365 /* We are done. */
1366 if (index >= end)
1367 break;
1368
1369 page = radix_tree_deref_slot_protected(slot,
1370 &mapping->tree_lock);
1370 &mapping->i_pages.xa_lock);
1371 if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
1371 if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
1372 spin_unlock_irq(&mapping->tree_lock);
1372 xa_unlock_irq(&mapping->i_pages);
1373 /* swap in or instantiate fallocated page */
1374 if (shmem_getpage(mapping->host, index, &page,
1375 SGP_NOHUGE)) {
1376 result = SCAN_FAIL;
1377 goto tree_unlocked;
1378 }
1373 /* swap in or instantiate fallocated page */
1374 if (shmem_getpage(mapping->host, index, &page,
1375 SGP_NOHUGE)) {
1376 result = SCAN_FAIL;
1377 goto tree_unlocked;
1378 }
1379 spin_lock_irq(&mapping->tree_lock);
1379 xa_lock_irq(&mapping->i_pages);
1380 } else if (trylock_page(page)) {
1381 get_page(page);
1382 } else {
1383 result = SCAN_PAGE_LOCK;
1384 break;
1385 }
1386
1387 /*
1380 } else if (trylock_page(page)) {
1381 get_page(page);
1382 } else {
1383 result = SCAN_PAGE_LOCK;
1384 break;
1385 }
1386
1387 /*
1388 * The page must be locked, so we can drop the tree_lock
1388 * The page must be locked, so we can drop the i_pages lock
1389 * without racing with truncate.
1390 */
1391 VM_BUG_ON_PAGE(!PageLocked(page), page);
1392 VM_BUG_ON_PAGE(!PageUptodate(page), page);
1393 VM_BUG_ON_PAGE(PageTransCompound(page), page);
1394
1395 if (page_mapping(page) != mapping) {
1396 result = SCAN_TRUNCATED;
1397 goto out_unlock;
1398 }
1389 * without racing with truncate.
1390 */
1391 VM_BUG_ON_PAGE(!PageLocked(page), page);
1392 VM_BUG_ON_PAGE(!PageUptodate(page), page);
1393 VM_BUG_ON_PAGE(PageTransCompound(page), page);
1394
1395 if (page_mapping(page) != mapping) {
1396 result = SCAN_TRUNCATED;
1397 goto out_unlock;
1398 }
1399 spin_unlock_irq(&mapping->tree_lock);
1399 xa_unlock_irq(&mapping->i_pages);
1400
1401 if (isolate_lru_page(page)) {
1402 result = SCAN_DEL_PAGE_LRU;
1403 goto out_isolate_failed;
1404 }
1405
1406 if (page_mapped(page))
1407 unmap_mapping_pages(mapping, index, 1, false);
1408
1400
1401 if (isolate_lru_page(page)) {
1402 result = SCAN_DEL_PAGE_LRU;
1403 goto out_isolate_failed;
1404 }
1405
1406 if (page_mapped(page))
1407 unmap_mapping_pages(mapping, index, 1, false);
1408
1409 spin_lock_irq(&mapping->tree_lock);
1409 xa_lock_irq(&mapping->i_pages);
1410
1410
1411 slot = radix_tree_lookup_slot(&mapping->page_tree, index);
1411 slot = radix_tree_lookup_slot(&mapping->i_pages, index);
1412 VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
1412 VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
1413 &mapping->tree_lock), page);
1413 &mapping->i_pages.xa_lock), page);
1414 VM_BUG_ON_PAGE(page_mapped(page), page);
1415
1416 /*
1417 * The page is expected to have page_count() == 3:
1418 * - we hold a pin on it;
1419 * - one reference from radix tree;
1420 * - one from isolate_lru_page;
1421 */

--- 4 unchanged lines hidden (view full) ---

1426
1427 /*
1428 * Add the page to the list to be able to undo the collapse if
1429 * something go wrong.
1430 */
1431 list_add_tail(&page->lru, &pagelist);
1432
1433 /* Finally, replace with the new page. */
1414 VM_BUG_ON_PAGE(page_mapped(page), page);
1415
1416 /*
1417 * The page is expected to have page_count() == 3:
1418 * - we hold a pin on it;
1419 * - one reference from radix tree;
1420 * - one from isolate_lru_page;
1421 */

--- 4 unchanged lines hidden (view full) ---

1426
1427 /*
1428 * Add the page to the list to be able to undo the collapse if
1429 * something go wrong.
1430 */
1431 list_add_tail(&page->lru, &pagelist);
1432
1433 /* Finally, replace with the new page. */
1434 radix_tree_replace_slot(&mapping->page_tree, slot,
1434 radix_tree_replace_slot(&mapping->i_pages, slot,
1435 new_page + (index % HPAGE_PMD_NR));
1436
1437 slot = radix_tree_iter_resume(slot, &iter);
1438 index++;
1439 continue;
1440out_lru:
1435 new_page + (index % HPAGE_PMD_NR));
1436
1437 slot = radix_tree_iter_resume(slot, &iter);
1438 index++;
1439 continue;
1440out_lru:
1441 spin_unlock_irq(&mapping->tree_lock);
1441 xa_unlock_irq(&mapping->i_pages);
1442 putback_lru_page(page);
1443out_isolate_failed:
1444 unlock_page(page);
1445 put_page(page);
1446 goto tree_unlocked;
1447out_unlock:
1448 unlock_page(page);
1449 put_page(page);

--- 9 unchanged lines hidden (view full) ---

1459 int n = end - index;
1460
1461 if (!shmem_charge(mapping->host, n)) {
1462 result = SCAN_FAIL;
1463 goto tree_locked;
1464 }
1465
1466 for (; index < end; index++) {
1442 putback_lru_page(page);
1443out_isolate_failed:
1444 unlock_page(page);
1445 put_page(page);
1446 goto tree_unlocked;
1447out_unlock:
1448 unlock_page(page);
1449 put_page(page);

--- 9 unchanged lines hidden (view full) ---

1459 int n = end - index;
1460
1461 if (!shmem_charge(mapping->host, n)) {
1462 result = SCAN_FAIL;
1463 goto tree_locked;
1464 }
1465
1466 for (; index < end; index++) {
1467 radix_tree_insert(&mapping->page_tree, index,
1467 radix_tree_insert(&mapping->i_pages, index,
1468 new_page + (index % HPAGE_PMD_NR));
1469 }
1470 nr_none += n;
1471 }
1472
1473tree_locked:
1468 new_page + (index % HPAGE_PMD_NR));
1469 }
1470 nr_none += n;
1471 }
1472
1473tree_locked:
1474 spin_unlock_irq(&mapping->tree_lock);
1474 xa_unlock_irq(&mapping->i_pages);
1475tree_unlocked:
1476
1477 if (result == SCAN_SUCCEED) {
1478 unsigned long flags;
1479 struct zone *zone = page_zone(new_page);
1480
1481 /*
1482 * Replacing old pages with new one has succeed, now we need to

--- 32 unchanged lines hidden (view full) ---

1515 mem_cgroup_commit_charge(new_page, memcg, false, true);
1516 lru_cache_add_anon(new_page);
1517 unlock_page(new_page);
1518
1519 *hpage = NULL;
1520 } else {
1521 /* Something went wrong: rollback changes to the radix-tree */
1522 shmem_uncharge(mapping->host, nr_none);
1475tree_unlocked:
1476
1477 if (result == SCAN_SUCCEED) {
1478 unsigned long flags;
1479 struct zone *zone = page_zone(new_page);
1480
1481 /*
1482 * Replacing old pages with new one has succeed, now we need to

--- 32 unchanged lines hidden (view full) ---

1515 mem_cgroup_commit_charge(new_page, memcg, false, true);
1516 lru_cache_add_anon(new_page);
1517 unlock_page(new_page);
1518
1519 *hpage = NULL;
1520 } else {
1521 /* Something went wrong: rollback changes to the radix-tree */
1522 shmem_uncharge(mapping->host, nr_none);
1523 spin_lock_irq(&mapping->tree_lock);
1524 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
1525 start) {
1523 xa_lock_irq(&mapping->i_pages);
1524 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1526 if (iter.index >= end)
1527 break;
1528 page = list_first_entry_or_null(&pagelist,
1529 struct page, lru);
1530 if (!page || iter.index < page->index) {
1531 if (!nr_none)
1532 break;
1533 nr_none--;
1534 /* Put holes back where they were */
1525 if (iter.index >= end)
1526 break;
1527 page = list_first_entry_or_null(&pagelist,
1528 struct page, lru);
1529 if (!page || iter.index < page->index) {
1530 if (!nr_none)
1531 break;
1532 nr_none--;
1533 /* Put holes back where they were */
1535 radix_tree_delete(&mapping->page_tree,
1536 iter.index);
1534 radix_tree_delete(&mapping->i_pages, iter.index);
1537 continue;
1538 }
1539
1540 VM_BUG_ON_PAGE(page->index != iter.index, page);
1541
1542 /* Unfreeze the page. */
1543 list_del(&page->lru);
1544 page_ref_unfreeze(page, 2);
1535 continue;
1536 }
1537
1538 VM_BUG_ON_PAGE(page->index != iter.index, page);
1539
1540 /* Unfreeze the page. */
1541 list_del(&page->lru);
1542 page_ref_unfreeze(page, 2);
1545 radix_tree_replace_slot(&mapping->page_tree,
1546 slot, page);
1543 radix_tree_replace_slot(&mapping->i_pages, slot, page);
1547 slot = radix_tree_iter_resume(slot, &iter);
1544 slot = radix_tree_iter_resume(slot, &iter);
1548 spin_unlock_irq(&mapping->tree_lock);
1545 xa_unlock_irq(&mapping->i_pages);
1549 putback_lru_page(page);
1550 unlock_page(page);
1546 putback_lru_page(page);
1547 unlock_page(page);
1551 spin_lock_irq(&mapping->tree_lock);
1548 xa_lock_irq(&mapping->i_pages);
1552 }
1553 VM_BUG_ON(nr_none);
1549 }
1550 VM_BUG_ON(nr_none);
1554 spin_unlock_irq(&mapping->tree_lock);
1551 xa_unlock_irq(&mapping->i_pages);
1555
1556 /* Unfreeze new_page, caller would take care about freeing it */
1557 page_ref_unfreeze(new_page, 1);
1558 mem_cgroup_cancel_charge(new_page, memcg, true);
1559 unlock_page(new_page);
1560 new_page->mapping = NULL;
1561 }
1562out:

--- 11 unchanged lines hidden (view full) ---

1574 int present, swap;
1575 int node = NUMA_NO_NODE;
1576 int result = SCAN_SUCCEED;
1577
1578 present = 0;
1579 swap = 0;
1580 memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
1581 rcu_read_lock();
1552
1553 /* Unfreeze new_page, caller would take care about freeing it */
1554 page_ref_unfreeze(new_page, 1);
1555 mem_cgroup_cancel_charge(new_page, memcg, true);
1556 unlock_page(new_page);
1557 new_page->mapping = NULL;
1558 }
1559out:

--- 11 unchanged lines hidden (view full) ---

1571 int present, swap;
1572 int node = NUMA_NO_NODE;
1573 int result = SCAN_SUCCEED;
1574
1575 present = 0;
1576 swap = 0;
1577 memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
1578 rcu_read_lock();
1582 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1579 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1583 if (iter.index >= start + HPAGE_PMD_NR)
1584 break;
1585
1586 page = radix_tree_deref_slot(slot);
1587 if (radix_tree_deref_retry(page)) {
1588 slot = radix_tree_iter_retry(&iter);
1589 continue;
1590 }

--- 359 unchanged lines hidden ---
1580 if (iter.index >= start + HPAGE_PMD_NR)
1581 break;
1582
1583 page = radix_tree_deref_slot(slot);
1584 if (radix_tree_deref_retry(page)) {
1585 slot = radix_tree_iter_retry(&iter);
1586 continue;
1587 }

--- 359 unchanged lines hidden ---