s390/mm/gmap_helpers.c

// SPDX-License-Identifier: GPL-2.0
/*
 *  Helper functions for KVM guest address space mapping code
 *
 *    Copyright IBM Corp. 2007, 2025
 */

#include <linux/export.h>
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pagewalk.h>
#include <linux/ksm.h>
#include <asm/gmap_helpers.h>
#include <asm/pgtable.h>

/**
 * ptep_zap_swap_entry() - discard a swap entry.
 * @mm: the mm
 * @entry: the swap entry that needs to be zapped
 *
 * Discards the given swap entry. If the swap entry was an actual swap
 * entry (and not a migration entry, for example), the actual swapped
 * page is also discarded from swap.
 */
static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
{
	if (!non_swap_entry(entry))
		dec_mm_counter(mm, MM_SWAPENTS);
	else if (is_migration_entry(entry))
		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
	free_swap_and_cache(entry);
}

/**
 * gmap_helper_zap_one_page() - discard a page if it was swapped.
 * @mm: the mm
 * @vmaddr: the userspace virtual address that needs to be discarded
 *
 * If the given address maps to a swap entry, discard it.
 *
 * Context: needs to be called while holding the mmap lock.
 */
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
	struct vm_area_struct *vma;
	spinlock_t *ptl;
	pgste_t pgste;
	pte_t *ptep;

	mmap_assert_locked(mm);

	/* Find the vm address for the guest address */
	vma = vma_lookup(mm, vmaddr);
	if (!vma || is_vm_hugetlb_page(vma))
		return;

	/* Get pointer to the page table entry */
	ptep = get_locked_pte(mm, vmaddr, &ptl);
	if (unlikely(!ptep))
		return;
	if (pte_swap(*ptep)) {
		preempt_disable();
		pgste = pgste_get_lock(ptep);

		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
		pte_clear(mm, vmaddr, ptep);

		pgste_set_unlock(ptep, pgste);
		preempt_enable();
	}
	pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);

/**
 * gmap_helper_discard() - discard user pages in the given range
 * @mm: the mm
 * @vmaddr: starting userspace address
 * @end: end address (first address outside the range)
 *
 * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
 *
 * Context: needs to be called while holding the mmap lock.
 */
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
{
	struct vm_area_struct *vma;

	mmap_assert_locked(mm);

	while (vmaddr < end) {
		vma = find_vma_intersection(mm, vmaddr, end);
		if (!vma)
			return;
		if (!is_vm_hugetlb_page(vma))
			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
		vmaddr = vma->vm_end;
	}
}
EXPORT_SYMBOL_GPL(gmap_helper_discard);

static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
				   unsigned long end, struct mm_walk *walk)
{
	unsigned long *found_addr = walk->private;

	/* Return 1 of the page is a zeropage. */
	if (is_zero_pfn(pte_pfn(*pte))) {
		/*
		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
		 * currently only works in COW mappings, which is also where
		 * mm_forbids_zeropage() is checked.
		 */
		if (!is_cow_mapping(walk->vma->vm_flags))
			return -EFAULT;

		*found_addr = addr;
		return 1;
	}
	return 0;
}

static const struct mm_walk_ops find_zeropage_ops = {
	.pte_entry      = find_zeropage_pte_entry,
	.walk_lock      = PGWALK_WRLOCK,
};

/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
 * @mm: the mm whose zero pages are to be unshared
 *
 * Unshare all shared zeropages, replacing them by anonymous pages. Note that
 * we cannot simply zap all shared zeropages, because this could later
 * trigger unexpected userfaultfd missing events.
 *
 * This must be called after mm->context.allow_cow_sharing was
 * set to 0, to avoid future mappings of shared zeropages.
 *
 * mm contracts with s390, that even if mm were to remove a page table,
 * and racing with walk_page_range_vma() calling pte_offset_map_lock()
 * would fail, it will never insert a page table containing empty zero
 * pages once mm_forbids_zeropage(mm) i.e.
 * mm->context.allow_cow_sharing is set to 0.
 */
static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
{
	struct vm_area_struct *vma;
	VMA_ITERATOR(vmi, mm, 0);
	unsigned long addr;
	vm_fault_t fault;
	int rc;

	for_each_vma(vmi, vma) {
		/*
		 * We could only look at COW mappings, but it's more future
		 * proof to catch unexpected zeropages in other mappings and
		 * fail.
		 */
		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
			continue;
		addr = vma->vm_start;

retry:
		rc = walk_page_range_vma(vma, addr, vma->vm_end,
					 &find_zeropage_ops, &addr);
		if (rc < 0)
			return rc;
		else if (!rc)
			continue;

		/* addr was updated by find_zeropage_pte_entry() */
		fault = handle_mm_fault(vma, addr,
					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
					NULL);
		if (fault & VM_FAULT_OOM)
			return -ENOMEM;
		/*
		 * See break_ksm(): even after handle_mm_fault() returned 0, we
		 * must start the lookup from the current address, because
		 * handle_mm_fault() may back out if there's any difficulty.
		 *
		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
		 * maybe they could trigger in the future on concurrent
		 * truncation. In that case, the shared zeropage would be gone
		 * and we can simply retry and make progress.
		 */
		cond_resched();
		goto retry;
	}

	return 0;
}

/**
 * gmap_helper_disable_cow_sharing() - disable all COW sharing
 *
 * Disable most COW-sharing of memory pages for the whole process:
 * (1) Disable KSM and unmerge/unshare any KSM pages.
 * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
 *
 * Not that we currently don't bother with COW-shared pages that are shared
 * with parent/child processes due to fork().
 */
int gmap_helper_disable_cow_sharing(void)
{
	struct mm_struct *mm = current->mm;
	int rc;

	mmap_assert_write_locked(mm);

	if (!mm->context.allow_cow_sharing)
		return 0;

	mm->context.allow_cow_sharing = 0;

	/* Replace all shared zeropages by anonymous pages. */
	rc = __gmap_helper_unshare_zeropages(mm);
	/*
	 * Make sure to disable KSM (if enabled for the whole process or
	 * individual VMAs). Note that nothing currently hinders user space
	 * from re-enabling it.
	 */
	if (!rc)
		rc = ksm_disable(mm);
	if (rc)
		mm->context.allow_cow_sharing = 1;
	return rc;
}
EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);