1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2ace71a19SKirill A. Shutemov #include <linux/mm.h>
3ace71a19SKirill A. Shutemov #include <linux/rmap.h>
4ace71a19SKirill A. Shutemov #include <linux/hugetlb.h>
5ace71a19SKirill A. Shutemov #include <linux/swap.h>
6ace71a19SKirill A. Shutemov #include <linux/swapops.h>
7ace71a19SKirill A. Shutemov
8ace71a19SKirill A. Shutemov #include "internal.h"
9ace71a19SKirill A. Shutemov
not_found(struct page_vma_mapped_walk * pvmw)10ace71a19SKirill A. Shutemov static inline bool not_found(struct page_vma_mapped_walk *pvmw)
11ace71a19SKirill A. Shutemov {
12ace71a19SKirill A. Shutemov page_vma_mapped_walk_done(pvmw);
13ace71a19SKirill A. Shutemov return false;
14ace71a19SKirill A. Shutemov }
15ace71a19SKirill A. Shutemov
map_pte(struct page_vma_mapped_walk * pvmw,spinlock_t ** ptlp)162798bbe7SHugh Dickins static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
17ace71a19SKirill A. Shutemov {
18c33c7948SRyan Roberts pte_t ptent;
19c33c7948SRyan Roberts
2090f43b0aSHugh Dickins if (pvmw->flags & PVMW_SYNC) {
2190f43b0aSHugh Dickins /* Use the stricter lookup */
2290f43b0aSHugh Dickins pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd,
2390f43b0aSHugh Dickins pvmw->address, &pvmw->ptl);
242798bbe7SHugh Dickins *ptlp = pvmw->ptl;
252798bbe7SHugh Dickins return !!pvmw->pte;
2690f43b0aSHugh Dickins }
2790f43b0aSHugh Dickins
282798bbe7SHugh Dickins /*
292798bbe7SHugh Dickins * It is important to return the ptl corresponding to pte,
302798bbe7SHugh Dickins * in case *pvmw->pmd changes underneath us; so we need to
312798bbe7SHugh Dickins * return it even when choosing not to lock, in case caller
322798bbe7SHugh Dickins * proceeds to loop over next ptes, and finds a match later.
332798bbe7SHugh Dickins * Though, in most cases, page lock already protects this.
342798bbe7SHugh Dickins */
352798bbe7SHugh Dickins pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd,
362798bbe7SHugh Dickins pvmw->address, ptlp);
372798bbe7SHugh Dickins if (!pvmw->pte)
382798bbe7SHugh Dickins return false;
392798bbe7SHugh Dickins
40c33c7948SRyan Roberts ptent = ptep_get(pvmw->pte);
41c33c7948SRyan Roberts
42ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) {
43c33c7948SRyan Roberts if (!is_swap_pte(ptent))
44ace71a19SKirill A. Shutemov return false;
45c33c7948SRyan Roberts } else if (is_swap_pte(ptent)) {
4690f43b0aSHugh Dickins swp_entry_t entry;
47aab8d052SRalph Campbell /*
4890f43b0aSHugh Dickins * Handle un-addressable ZONE_DEVICE memory.
4990f43b0aSHugh Dickins *
50aab8d052SRalph Campbell * We get here when we are trying to unmap a private
51aab8d052SRalph Campbell * device page from the process address space. Such
52aab8d052SRalph Campbell * page is not CPU accessible and thus is mapped as
53aab8d052SRalph Campbell * a special swap entry, nonetheless it still does
5490f43b0aSHugh Dickins * count as a valid regular mapping for the page
5590f43b0aSHugh Dickins * (and is accounted as such in page maps count).
56aab8d052SRalph Campbell *
57aab8d052SRalph Campbell * So handle this special case as if it was a normal
5890f43b0aSHugh Dickins * page mapping ie lock CPU page table and return true.
59aab8d052SRalph Campbell *
60aab8d052SRalph Campbell * For more details on device private memory see HMM
61aab8d052SRalph Campbell * (include/linux/hmm.h or mm/hmm.c).
62aab8d052SRalph Campbell */
63c33c7948SRyan Roberts entry = pte_to_swp_entry(ptent);
64b756a3b5SAlistair Popple if (!is_device_private_entry(entry) &&
65b756a3b5SAlistair Popple !is_device_exclusive_entry(entry))
66aab8d052SRalph Campbell return false;
67c33c7948SRyan Roberts } else if (!pte_present(ptent)) {
68ace71a19SKirill A. Shutemov return false;
69ace71a19SKirill A. Shutemov }
702798bbe7SHugh Dickins pvmw->ptl = *ptlp;
71ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl);
72ace71a19SKirill A. Shutemov return true;
73ace71a19SKirill A. Shutemov }
74ace71a19SKirill A. Shutemov
750d665e7bSKirill A. Shutemov /**
769651eeabSKemeng Shi * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is
779651eeabSKemeng Shi * mapped at the @pvmw->pte
789651eeabSKemeng Shi * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range
799651eeabSKemeng Shi * for checking
800d665e7bSKirill A. Shutemov *
819651eeabSKemeng Shi * page_vma_mapped_walk() found a place where pfn range is *potentially*
820d665e7bSKirill A. Shutemov * mapped. check_pte() has to validate this.
830d665e7bSKirill A. Shutemov *
84777f303cSAlex Shi * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to
85777f303cSAlex Shi * arbitrary page.
860d665e7bSKirill A. Shutemov *
870d665e7bSKirill A. Shutemov * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
889651eeabSKemeng Shi * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
890d665e7bSKirill A. Shutemov *
90777f303cSAlex Shi * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to
919651eeabSKemeng Shi * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
920d665e7bSKirill A. Shutemov *
930d665e7bSKirill A. Shutemov * Otherwise, return false.
940d665e7bSKirill A. Shutemov *
950d665e7bSKirill A. Shutemov */
check_pte(struct page_vma_mapped_walk * pvmw)96ace71a19SKirill A. Shutemov static bool check_pte(struct page_vma_mapped_walk *pvmw)
97ace71a19SKirill A. Shutemov {
980d665e7bSKirill A. Shutemov unsigned long pfn;
99c33c7948SRyan Roberts pte_t ptent = ptep_get(pvmw->pte);
1000d665e7bSKirill A. Shutemov
101ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) {
102ace71a19SKirill A. Shutemov swp_entry_t entry;
103c33c7948SRyan Roberts if (!is_swap_pte(ptent))
104ace71a19SKirill A. Shutemov return false;
105c33c7948SRyan Roberts entry = pte_to_swp_entry(ptent);
106a5430ddaSJérôme Glisse
107b756a3b5SAlistair Popple if (!is_migration_entry(entry) &&
108b756a3b5SAlistair Popple !is_device_exclusive_entry(entry))
109ace71a19SKirill A. Shutemov return false;
1100d665e7bSKirill A. Shutemov
1110d206b5dSPeter Xu pfn = swp_offset_pfn(entry);
112c33c7948SRyan Roberts } else if (is_swap_pte(ptent)) {
113a5430ddaSJérôme Glisse swp_entry_t entry;
114a5430ddaSJérôme Glisse
1150d665e7bSKirill A. Shutemov /* Handle un-addressable ZONE_DEVICE memory */
116c33c7948SRyan Roberts entry = pte_to_swp_entry(ptent);
117b756a3b5SAlistair Popple if (!is_device_private_entry(entry) &&
118b756a3b5SAlistair Popple !is_device_exclusive_entry(entry))
1190d665e7bSKirill A. Shutemov return false;
120a5430ddaSJérôme Glisse
1210d206b5dSPeter Xu pfn = swp_offset_pfn(entry);
1220d665e7bSKirill A. Shutemov } else {
123c33c7948SRyan Roberts if (!pte_present(ptent))
124ace71a19SKirill A. Shutemov return false;
125ace71a19SKirill A. Shutemov
126c33c7948SRyan Roberts pfn = pte_pfn(ptent);
1270d665e7bSKirill A. Shutemov }
1280d665e7bSKirill A. Shutemov
1292aff7a47SMatthew Wilcox (Oracle) return (pfn - pvmw->pfn) < pvmw->nr_pages;
1302aff7a47SMatthew Wilcox (Oracle) }
1312aff7a47SMatthew Wilcox (Oracle)
1322aff7a47SMatthew Wilcox (Oracle) /* Returns true if the two ranges overlap. Careful to not overflow. */
check_pmd(unsigned long pfn,struct page_vma_mapped_walk * pvmw)1332aff7a47SMatthew Wilcox (Oracle) static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
1342aff7a47SMatthew Wilcox (Oracle) {
1352aff7a47SMatthew Wilcox (Oracle) if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
1362aff7a47SMatthew Wilcox (Oracle) return false;
1372aff7a47SMatthew Wilcox (Oracle) if (pfn > pvmw->pfn + pvmw->nr_pages - 1)
1382aff7a47SMatthew Wilcox (Oracle) return false;
1392aff7a47SMatthew Wilcox (Oracle) return true;
140ace71a19SKirill A. Shutemov }
141ace71a19SKirill A. Shutemov
step_forward(struct page_vma_mapped_walk * pvmw,unsigned long size)142a9a7504dSHugh Dickins static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
143a9a7504dSHugh Dickins {
144a9a7504dSHugh Dickins pvmw->address = (pvmw->address + size) & ~(size - 1);
145a9a7504dSHugh Dickins if (!pvmw->address)
146a9a7504dSHugh Dickins pvmw->address = ULONG_MAX;
147a9a7504dSHugh Dickins }
148a9a7504dSHugh Dickins
149ace71a19SKirill A. Shutemov /**
1502aff7a47SMatthew Wilcox (Oracle) * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
151ace71a19SKirill A. Shutemov * @pvmw->address
152ace71a19SKirill A. Shutemov * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
153ace71a19SKirill A. Shutemov * must be set. pmd, pte and ptl must be NULL.
154ace71a19SKirill A. Shutemov *
155ace71a19SKirill A. Shutemov * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
156ace71a19SKirill A. Shutemov * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
157ace71a19SKirill A. Shutemov * adjusted if needed (for PTE-mapped THPs).
158ace71a19SKirill A. Shutemov *
159ace71a19SKirill A. Shutemov * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
160ace71a19SKirill A. Shutemov * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
161ace71a19SKirill A. Shutemov * a loop to find all PTEs that map the THP.
162ace71a19SKirill A. Shutemov *
163ace71a19SKirill A. Shutemov * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
164ace71a19SKirill A. Shutemov * regardless of which page table level the page is mapped at. @pvmw->pmd is
165ace71a19SKirill A. Shutemov * NULL.
166ace71a19SKirill A. Shutemov *
167baf2f90bSLu Jialin * Returns false if there are no more page table entries for the page in
168ace71a19SKirill A. Shutemov * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
169ace71a19SKirill A. Shutemov *
170ace71a19SKirill A. Shutemov * If you need to stop the walk before page_vma_mapped_walk() returned false,
171ace71a19SKirill A. Shutemov * use page_vma_mapped_walk_done(). It will do the housekeeping.
172ace71a19SKirill A. Shutemov */
page_vma_mapped_walk(struct page_vma_mapped_walk * pvmw)173ace71a19SKirill A. Shutemov bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
174ace71a19SKirill A. Shutemov {
1752aff7a47SMatthew Wilcox (Oracle) struct vm_area_struct *vma = pvmw->vma;
1762aff7a47SMatthew Wilcox (Oracle) struct mm_struct *mm = vma->vm_mm;
17747446630SHugh Dickins unsigned long end;
1782798bbe7SHugh Dickins spinlock_t *ptl;
179ace71a19SKirill A. Shutemov pgd_t *pgd;
180c2febafcSKirill A. Shutemov p4d_t *p4d;
181ace71a19SKirill A. Shutemov pud_t *pud;
182a7b10095SWill Deacon pmd_t pmde;
183ace71a19SKirill A. Shutemov
184ace71a19SKirill A. Shutemov /* The only possible pmd mapping has been handled on last iteration */
185ace71a19SKirill A. Shutemov if (pvmw->pmd && !pvmw->pte)
186ace71a19SKirill A. Shutemov return not_found(pvmw);
187ace71a19SKirill A. Shutemov
1882aff7a47SMatthew Wilcox (Oracle) if (unlikely(is_vm_hugetlb_page(vma))) {
18998ea0259Szhenwei pi struct hstate *hstate = hstate_vma(vma);
19098ea0259Szhenwei pi unsigned long size = huge_page_size(hstate);
1916d0fd598SHugh Dickins /* The only possible mapping was handled on last iteration */
1926d0fd598SHugh Dickins if (pvmw->pte)
1936d0fd598SHugh Dickins return not_found(pvmw);
1949c67a207SPeter Xu /*
1959c67a207SPeter Xu * All callers that get here will already hold the
1969c67a207SPeter Xu * i_mmap_rwsem. Therefore, no additional locks need to be
1979c67a207SPeter Xu * taken before calling hugetlb_walk().
1989c67a207SPeter Xu */
1999c67a207SPeter Xu pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
200ace71a19SKirill A. Shutemov if (!pvmw->pte)
201ace71a19SKirill A. Shutemov return false;
202ace71a19SKirill A. Shutemov
2038f0b747dSMiaohe Lin pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte);
204ace71a19SKirill A. Shutemov if (!check_pte(pvmw))
205ace71a19SKirill A. Shutemov return not_found(pvmw);
206ace71a19SKirill A. Shutemov return true;
207ace71a19SKirill A. Shutemov }
2086d0fd598SHugh Dickins
2092aff7a47SMatthew Wilcox (Oracle) end = vma_address_end(pvmw);
2106d0fd598SHugh Dickins if (pvmw->pte)
2116d0fd598SHugh Dickins goto next_pte;
212ace71a19SKirill A. Shutemov restart:
213a9a7504dSHugh Dickins do {
214ace71a19SKirill A. Shutemov pgd = pgd_offset(mm, pvmw->address);
215a9a7504dSHugh Dickins if (!pgd_present(*pgd)) {
216a9a7504dSHugh Dickins step_forward(pvmw, PGDIR_SIZE);
217a9a7504dSHugh Dickins continue;
218a9a7504dSHugh Dickins }
219c2febafcSKirill A. Shutemov p4d = p4d_offset(pgd, pvmw->address);
220a9a7504dSHugh Dickins if (!p4d_present(*p4d)) {
221a9a7504dSHugh Dickins step_forward(pvmw, P4D_SIZE);
222a9a7504dSHugh Dickins continue;
223a9a7504dSHugh Dickins }
224c2febafcSKirill A. Shutemov pud = pud_offset(p4d, pvmw->address);
225a9a7504dSHugh Dickins if (!pud_present(*pud)) {
226a9a7504dSHugh Dickins step_forward(pvmw, PUD_SIZE);
227a9a7504dSHugh Dickins continue;
228a9a7504dSHugh Dickins }
229b3807a91SHugh Dickins
230ace71a19SKirill A. Shutemov pvmw->pmd = pmd_offset(pud, pvmw->address);
231a7b10095SWill Deacon /*
232a7b10095SWill Deacon * Make sure the pmd value isn't cached in a register by the
233a7b10095SWill Deacon * compiler and used as a stale value after we've observed a
234a7b10095SWill Deacon * subsequent update.
235a7b10095SWill Deacon */
23626e1a0c3SHugh Dickins pmde = pmdp_get_lockless(pvmw->pmd);
237b3807a91SHugh Dickins
2386472f6d2SMuchun Song if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) ||
2396472f6d2SMuchun Song (pmd_present(pmde) && pmd_devmap(pmde))) {
240ace71a19SKirill A. Shutemov pvmw->ptl = pmd_lock(mm, pvmw->pmd);
2413306d311SHugh Dickins pmde = *pvmw->pmd;
242e2e1d407SHugh Dickins if (!pmd_present(pmde)) {
243e2e1d407SHugh Dickins swp_entry_t entry;
244616b8371SZi Yan
245e2e1d407SHugh Dickins if (!thp_migration_supported() ||
246e2e1d407SHugh Dickins !(pvmw->flags & PVMW_MIGRATION))
247e2e1d407SHugh Dickins return not_found(pvmw);
248e2e1d407SHugh Dickins entry = pmd_to_swp_entry(pmde);
249e2e1d407SHugh Dickins if (!is_migration_entry(entry) ||
2500d206b5dSPeter Xu !check_pmd(swp_offset_pfn(entry), pvmw))
251616b8371SZi Yan return not_found(pvmw);
252616b8371SZi Yan return true;
253616b8371SZi Yan }
2546472f6d2SMuchun Song if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) {
2556472f6d2SMuchun Song if (pvmw->flags & PVMW_MIGRATION)
2566472f6d2SMuchun Song return not_found(pvmw);
2576472f6d2SMuchun Song if (!check_pmd(pmd_pfn(pmde), pvmw))
2586472f6d2SMuchun Song return not_found(pvmw);
2596472f6d2SMuchun Song return true;
2606472f6d2SMuchun Song }
261ace71a19SKirill A. Shutemov /* THP pmd was split under us: handle on pte level */
262ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl);
263ace71a19SKirill A. Shutemov pvmw->ptl = NULL;
264a7b10095SWill Deacon } else if (!pmd_present(pmde)) {
265732ed558SHugh Dickins /*
266732ed558SHugh Dickins * If PVMW_SYNC, take and drop THP pmd lock so that we
267732ed558SHugh Dickins * cannot return prematurely, while zap_huge_pmd() has
268732ed558SHugh Dickins * cleared *pmd but not decremented compound_mapcount().
269732ed558SHugh Dickins */
270b3807a91SHugh Dickins if ((pvmw->flags & PVMW_SYNC) &&
271*3485b883SRyan Roberts thp_vma_suitable_order(vma, pvmw->address,
272*3485b883SRyan Roberts PMD_ORDER) &&
2732aff7a47SMatthew Wilcox (Oracle) (pvmw->nr_pages >= HPAGE_PMD_NR)) {
274732ed558SHugh Dickins spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
275732ed558SHugh Dickins
276732ed558SHugh Dickins spin_unlock(ptl);
277732ed558SHugh Dickins }
278a9a7504dSHugh Dickins step_forward(pvmw, PMD_SIZE);
279a9a7504dSHugh Dickins continue;
280ace71a19SKirill A. Shutemov }
2812798bbe7SHugh Dickins if (!map_pte(pvmw, &ptl)) {
2822798bbe7SHugh Dickins if (!pvmw->pte)
2832798bbe7SHugh Dickins goto restart;
284ace71a19SKirill A. Shutemov goto next_pte;
2852798bbe7SHugh Dickins }
28647446630SHugh Dickins this_pte:
287ace71a19SKirill A. Shutemov if (check_pte(pvmw))
288ace71a19SKirill A. Shutemov return true;
289d75450ffSHugh Dickins next_pte:
290d75450ffSHugh Dickins do {
291ace71a19SKirill A. Shutemov pvmw->address += PAGE_SIZE;
292494334e4SHugh Dickins if (pvmw->address >= end)
293ace71a19SKirill A. Shutemov return not_found(pvmw);
294ace71a19SKirill A. Shutemov /* Did we cross page table boundary? */
29544828248SHugh Dickins if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
296ace71a19SKirill A. Shutemov if (pvmw->ptl) {
297ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl);
298ace71a19SKirill A. Shutemov pvmw->ptl = NULL;
299ace71a19SKirill A. Shutemov }
30044828248SHugh Dickins pte_unmap(pvmw->pte);
30144828248SHugh Dickins pvmw->pte = NULL;
302ace71a19SKirill A. Shutemov goto restart;
303ace71a19SKirill A. Shutemov }
30444828248SHugh Dickins pvmw->pte++;
305c33c7948SRyan Roberts } while (pte_none(ptep_get(pvmw->pte)));
306ace71a19SKirill A. Shutemov
307ace71a19SKirill A. Shutemov if (!pvmw->ptl) {
3082798bbe7SHugh Dickins pvmw->ptl = ptl;
309ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl);
310ace71a19SKirill A. Shutemov }
31147446630SHugh Dickins goto this_pte;
312a9a7504dSHugh Dickins } while (pvmw->address < end);
313a9a7504dSHugh Dickins
314a9a7504dSHugh Dickins return false;
315ace71a19SKirill A. Shutemov }
3166a328a62SKirill A. Shutemov
3176a328a62SKirill A. Shutemov /**
3186a328a62SKirill A. Shutemov * page_mapped_in_vma - check whether a page is really mapped in a VMA
3196a328a62SKirill A. Shutemov * @page: the page to test
3206a328a62SKirill A. Shutemov * @vma: the VMA to test
3216a328a62SKirill A. Shutemov *
3226a328a62SKirill A. Shutemov * Returns 1 if the page is mapped into the page tables of the VMA, 0
3236a328a62SKirill A. Shutemov * if the page is not mapped into the page tables of this VMA. Only
3246a328a62SKirill A. Shutemov * valid for normal file or anonymous VMAs.
3256a328a62SKirill A. Shutemov */
page_mapped_in_vma(struct page * page,struct vm_area_struct * vma)3266a328a62SKirill A. Shutemov int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
3276a328a62SKirill A. Shutemov {
3286a328a62SKirill A. Shutemov struct page_vma_mapped_walk pvmw = {
3292aff7a47SMatthew Wilcox (Oracle) .pfn = page_to_pfn(page),
3302aff7a47SMatthew Wilcox (Oracle) .nr_pages = 1,
3316a328a62SKirill A. Shutemov .vma = vma,
3326a328a62SKirill A. Shutemov .flags = PVMW_SYNC,
3336a328a62SKirill A. Shutemov };
3346a328a62SKirill A. Shutemov
335494334e4SHugh Dickins pvmw.address = vma_address(page, vma);
336494334e4SHugh Dickins if (pvmw.address == -EFAULT)
3376a328a62SKirill A. Shutemov return 0;
3386a328a62SKirill A. Shutemov if (!page_vma_mapped_walk(&pvmw))
3396a328a62SKirill A. Shutemov return 0;
3406a328a62SKirill A. Shutemov page_vma_mapped_walk_done(&pvmw);
3416a328a62SKirill A. Shutemov return 1;
3426a328a62SKirill A. Shutemov }
343