1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2ace71a19SKirill A. Shutemov #include <linux/mm.h> 3ace71a19SKirill A. Shutemov #include <linux/rmap.h> 4ace71a19SKirill A. Shutemov #include <linux/hugetlb.h> 5ace71a19SKirill A. Shutemov #include <linux/swap.h> 6ace71a19SKirill A. Shutemov #include <linux/swapops.h> 7ace71a19SKirill A. Shutemov 8ace71a19SKirill A. Shutemov #include "internal.h" 9ace71a19SKirill A. Shutemov 10ace71a19SKirill A. Shutemov static inline bool not_found(struct page_vma_mapped_walk *pvmw) 11ace71a19SKirill A. Shutemov { 12ace71a19SKirill A. Shutemov page_vma_mapped_walk_done(pvmw); 13ace71a19SKirill A. Shutemov return false; 14ace71a19SKirill A. Shutemov } 15ace71a19SKirill A. Shutemov 16ace71a19SKirill A. Shutemov static bool map_pte(struct page_vma_mapped_walk *pvmw) 17ace71a19SKirill A. Shutemov { 18*90f43b0aSHugh Dickins if (pvmw->flags & PVMW_SYNC) { 19*90f43b0aSHugh Dickins /* Use the stricter lookup */ 20*90f43b0aSHugh Dickins pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd, 21*90f43b0aSHugh Dickins pvmw->address, &pvmw->ptl); 22*90f43b0aSHugh Dickins return true; 23*90f43b0aSHugh Dickins } 24*90f43b0aSHugh Dickins 25ace71a19SKirill A. Shutemov pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address); 26ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 27ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 28ace71a19SKirill A. Shutemov return false; 29*90f43b0aSHugh Dickins } else if (is_swap_pte(*pvmw->pte)) { 30*90f43b0aSHugh Dickins swp_entry_t entry; 31aab8d052SRalph Campbell /* 32*90f43b0aSHugh Dickins * Handle un-addressable ZONE_DEVICE memory. 33*90f43b0aSHugh Dickins * 34aab8d052SRalph Campbell * We get here when we are trying to unmap a private 35aab8d052SRalph Campbell * device page from the process address space. Such 36aab8d052SRalph Campbell * page is not CPU accessible and thus is mapped as 37aab8d052SRalph Campbell * a special swap entry, nonetheless it still does 38*90f43b0aSHugh Dickins * count as a valid regular mapping for the page 39*90f43b0aSHugh Dickins * (and is accounted as such in page maps count). 40aab8d052SRalph Campbell * 41aab8d052SRalph Campbell * So handle this special case as if it was a normal 42*90f43b0aSHugh Dickins * page mapping ie lock CPU page table and return true. 43aab8d052SRalph Campbell * 44aab8d052SRalph Campbell * For more details on device private memory see HMM 45aab8d052SRalph Campbell * (include/linux/hmm.h or mm/hmm.c). 46aab8d052SRalph Campbell */ 47aab8d052SRalph Campbell entry = pte_to_swp_entry(*pvmw->pte); 48b756a3b5SAlistair Popple if (!is_device_private_entry(entry) && 49b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 50aab8d052SRalph Campbell return false; 51*90f43b0aSHugh Dickins } else if (!pte_present(*pvmw->pte)) { 52ace71a19SKirill A. Shutemov return false; 53ace71a19SKirill A. Shutemov } 54ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd); 55ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 56ace71a19SKirill A. Shutemov return true; 57ace71a19SKirill A. Shutemov } 58ace71a19SKirill A. Shutemov 590d665e7bSKirill A. Shutemov /** 600d665e7bSKirill A. Shutemov * check_pte - check if @pvmw->page is mapped at the @pvmw->pte 61777f303cSAlex Shi * @pvmw: page_vma_mapped_walk struct, includes a pair pte and page for checking 620d665e7bSKirill A. Shutemov * 630d665e7bSKirill A. Shutemov * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* 640d665e7bSKirill A. Shutemov * mapped. check_pte() has to validate this. 650d665e7bSKirill A. Shutemov * 66777f303cSAlex Shi * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to 67777f303cSAlex Shi * arbitrary page. 680d665e7bSKirill A. Shutemov * 690d665e7bSKirill A. Shutemov * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration 700d665e7bSKirill A. Shutemov * entry that points to @pvmw->page or any subpage in case of THP. 710d665e7bSKirill A. Shutemov * 72777f303cSAlex Shi * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to 73777f303cSAlex Shi * pvmw->page or any subpage in case of THP. 740d665e7bSKirill A. Shutemov * 750d665e7bSKirill A. Shutemov * Otherwise, return false. 760d665e7bSKirill A. Shutemov * 770d665e7bSKirill A. Shutemov */ 78ace71a19SKirill A. Shutemov static bool check_pte(struct page_vma_mapped_walk *pvmw) 79ace71a19SKirill A. Shutemov { 800d665e7bSKirill A. Shutemov unsigned long pfn; 810d665e7bSKirill A. Shutemov 82ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 83ace71a19SKirill A. Shutemov swp_entry_t entry; 84ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 85ace71a19SKirill A. Shutemov return false; 86ace71a19SKirill A. Shutemov entry = pte_to_swp_entry(*pvmw->pte); 87a5430ddaSJérôme Glisse 88b756a3b5SAlistair Popple if (!is_migration_entry(entry) && 89b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 90ace71a19SKirill A. Shutemov return false; 910d665e7bSKirill A. Shutemov 920d206b5dSPeter Xu pfn = swp_offset_pfn(entry); 930d665e7bSKirill A. Shutemov } else if (is_swap_pte(*pvmw->pte)) { 94a5430ddaSJérôme Glisse swp_entry_t entry; 95a5430ddaSJérôme Glisse 960d665e7bSKirill A. Shutemov /* Handle un-addressable ZONE_DEVICE memory */ 97a5430ddaSJérôme Glisse entry = pte_to_swp_entry(*pvmw->pte); 98b756a3b5SAlistair Popple if (!is_device_private_entry(entry) && 99b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 1000d665e7bSKirill A. Shutemov return false; 101a5430ddaSJérôme Glisse 1020d206b5dSPeter Xu pfn = swp_offset_pfn(entry); 1030d665e7bSKirill A. Shutemov } else { 104ace71a19SKirill A. Shutemov if (!pte_present(*pvmw->pte)) 105ace71a19SKirill A. Shutemov return false; 106ace71a19SKirill A. Shutemov 1070d665e7bSKirill A. Shutemov pfn = pte_pfn(*pvmw->pte); 1080d665e7bSKirill A. Shutemov } 1090d665e7bSKirill A. Shutemov 1102aff7a47SMatthew Wilcox (Oracle) return (pfn - pvmw->pfn) < pvmw->nr_pages; 1112aff7a47SMatthew Wilcox (Oracle) } 1122aff7a47SMatthew Wilcox (Oracle) 1132aff7a47SMatthew Wilcox (Oracle) /* Returns true if the two ranges overlap. Careful to not overflow. */ 1142aff7a47SMatthew Wilcox (Oracle) static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw) 1152aff7a47SMatthew Wilcox (Oracle) { 1162aff7a47SMatthew Wilcox (Oracle) if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn) 1172aff7a47SMatthew Wilcox (Oracle) return false; 1182aff7a47SMatthew Wilcox (Oracle) if (pfn > pvmw->pfn + pvmw->nr_pages - 1) 1192aff7a47SMatthew Wilcox (Oracle) return false; 1202aff7a47SMatthew Wilcox (Oracle) return true; 121ace71a19SKirill A. Shutemov } 122ace71a19SKirill A. Shutemov 123a9a7504dSHugh Dickins static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) 124a9a7504dSHugh Dickins { 125a9a7504dSHugh Dickins pvmw->address = (pvmw->address + size) & ~(size - 1); 126a9a7504dSHugh Dickins if (!pvmw->address) 127a9a7504dSHugh Dickins pvmw->address = ULONG_MAX; 128a9a7504dSHugh Dickins } 129a9a7504dSHugh Dickins 130ace71a19SKirill A. Shutemov /** 1312aff7a47SMatthew Wilcox (Oracle) * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at 132ace71a19SKirill A. Shutemov * @pvmw->address 133ace71a19SKirill A. Shutemov * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags 134ace71a19SKirill A. Shutemov * must be set. pmd, pte and ptl must be NULL. 135ace71a19SKirill A. Shutemov * 136ace71a19SKirill A. Shutemov * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point 137ace71a19SKirill A. Shutemov * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is 138ace71a19SKirill A. Shutemov * adjusted if needed (for PTE-mapped THPs). 139ace71a19SKirill A. Shutemov * 140ace71a19SKirill A. Shutemov * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page 141ace71a19SKirill A. Shutemov * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in 142ace71a19SKirill A. Shutemov * a loop to find all PTEs that map the THP. 143ace71a19SKirill A. Shutemov * 144ace71a19SKirill A. Shutemov * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry 145ace71a19SKirill A. Shutemov * regardless of which page table level the page is mapped at. @pvmw->pmd is 146ace71a19SKirill A. Shutemov * NULL. 147ace71a19SKirill A. Shutemov * 148baf2f90bSLu Jialin * Returns false if there are no more page table entries for the page in 149ace71a19SKirill A. Shutemov * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. 150ace71a19SKirill A. Shutemov * 151ace71a19SKirill A. Shutemov * If you need to stop the walk before page_vma_mapped_walk() returned false, 152ace71a19SKirill A. Shutemov * use page_vma_mapped_walk_done(). It will do the housekeeping. 153ace71a19SKirill A. Shutemov */ 154ace71a19SKirill A. Shutemov bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) 155ace71a19SKirill A. Shutemov { 1562aff7a47SMatthew Wilcox (Oracle) struct vm_area_struct *vma = pvmw->vma; 1572aff7a47SMatthew Wilcox (Oracle) struct mm_struct *mm = vma->vm_mm; 15847446630SHugh Dickins unsigned long end; 159ace71a19SKirill A. Shutemov pgd_t *pgd; 160c2febafcSKirill A. Shutemov p4d_t *p4d; 161ace71a19SKirill A. Shutemov pud_t *pud; 162a7b10095SWill Deacon pmd_t pmde; 163ace71a19SKirill A. Shutemov 164ace71a19SKirill A. Shutemov /* The only possible pmd mapping has been handled on last iteration */ 165ace71a19SKirill A. Shutemov if (pvmw->pmd && !pvmw->pte) 166ace71a19SKirill A. Shutemov return not_found(pvmw); 167ace71a19SKirill A. Shutemov 1682aff7a47SMatthew Wilcox (Oracle) if (unlikely(is_vm_hugetlb_page(vma))) { 16998ea0259Szhenwei pi struct hstate *hstate = hstate_vma(vma); 17098ea0259Szhenwei pi unsigned long size = huge_page_size(hstate); 1716d0fd598SHugh Dickins /* The only possible mapping was handled on last iteration */ 1726d0fd598SHugh Dickins if (pvmw->pte) 1736d0fd598SHugh Dickins return not_found(pvmw); 1749c67a207SPeter Xu /* 1759c67a207SPeter Xu * All callers that get here will already hold the 1769c67a207SPeter Xu * i_mmap_rwsem. Therefore, no additional locks need to be 1779c67a207SPeter Xu * taken before calling hugetlb_walk(). 1789c67a207SPeter Xu */ 1799c67a207SPeter Xu pvmw->pte = hugetlb_walk(vma, pvmw->address, size); 180ace71a19SKirill A. Shutemov if (!pvmw->pte) 181ace71a19SKirill A. Shutemov return false; 182ace71a19SKirill A. Shutemov 1838f0b747dSMiaohe Lin pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); 184ace71a19SKirill A. Shutemov if (!check_pte(pvmw)) 185ace71a19SKirill A. Shutemov return not_found(pvmw); 186ace71a19SKirill A. Shutemov return true; 187ace71a19SKirill A. Shutemov } 1886d0fd598SHugh Dickins 1892aff7a47SMatthew Wilcox (Oracle) end = vma_address_end(pvmw); 1906d0fd598SHugh Dickins if (pvmw->pte) 1916d0fd598SHugh Dickins goto next_pte; 192ace71a19SKirill A. Shutemov restart: 193a9a7504dSHugh Dickins do { 194ace71a19SKirill A. Shutemov pgd = pgd_offset(mm, pvmw->address); 195a9a7504dSHugh Dickins if (!pgd_present(*pgd)) { 196a9a7504dSHugh Dickins step_forward(pvmw, PGDIR_SIZE); 197a9a7504dSHugh Dickins continue; 198a9a7504dSHugh Dickins } 199c2febafcSKirill A. Shutemov p4d = p4d_offset(pgd, pvmw->address); 200a9a7504dSHugh Dickins if (!p4d_present(*p4d)) { 201a9a7504dSHugh Dickins step_forward(pvmw, P4D_SIZE); 202a9a7504dSHugh Dickins continue; 203a9a7504dSHugh Dickins } 204c2febafcSKirill A. Shutemov pud = pud_offset(p4d, pvmw->address); 205a9a7504dSHugh Dickins if (!pud_present(*pud)) { 206a9a7504dSHugh Dickins step_forward(pvmw, PUD_SIZE); 207a9a7504dSHugh Dickins continue; 208a9a7504dSHugh Dickins } 209b3807a91SHugh Dickins 210ace71a19SKirill A. Shutemov pvmw->pmd = pmd_offset(pud, pvmw->address); 211a7b10095SWill Deacon /* 212a7b10095SWill Deacon * Make sure the pmd value isn't cached in a register by the 213a7b10095SWill Deacon * compiler and used as a stale value after we've observed a 214a7b10095SWill Deacon * subsequent update. 215a7b10095SWill Deacon */ 21626e1a0c3SHugh Dickins pmde = pmdp_get_lockless(pvmw->pmd); 217b3807a91SHugh Dickins 2186472f6d2SMuchun Song if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) || 2196472f6d2SMuchun Song (pmd_present(pmde) && pmd_devmap(pmde))) { 220ace71a19SKirill A. Shutemov pvmw->ptl = pmd_lock(mm, pvmw->pmd); 2213306d311SHugh Dickins pmde = *pvmw->pmd; 222e2e1d407SHugh Dickins if (!pmd_present(pmde)) { 223e2e1d407SHugh Dickins swp_entry_t entry; 224616b8371SZi Yan 225e2e1d407SHugh Dickins if (!thp_migration_supported() || 226e2e1d407SHugh Dickins !(pvmw->flags & PVMW_MIGRATION)) 227e2e1d407SHugh Dickins return not_found(pvmw); 228e2e1d407SHugh Dickins entry = pmd_to_swp_entry(pmde); 229e2e1d407SHugh Dickins if (!is_migration_entry(entry) || 2300d206b5dSPeter Xu !check_pmd(swp_offset_pfn(entry), pvmw)) 231616b8371SZi Yan return not_found(pvmw); 232616b8371SZi Yan return true; 233616b8371SZi Yan } 2346472f6d2SMuchun Song if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) { 2356472f6d2SMuchun Song if (pvmw->flags & PVMW_MIGRATION) 2366472f6d2SMuchun Song return not_found(pvmw); 2376472f6d2SMuchun Song if (!check_pmd(pmd_pfn(pmde), pvmw)) 2386472f6d2SMuchun Song return not_found(pvmw); 2396472f6d2SMuchun Song return true; 2406472f6d2SMuchun Song } 241ace71a19SKirill A. Shutemov /* THP pmd was split under us: handle on pte level */ 242ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 243ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 244a7b10095SWill Deacon } else if (!pmd_present(pmde)) { 245732ed558SHugh Dickins /* 246732ed558SHugh Dickins * If PVMW_SYNC, take and drop THP pmd lock so that we 247732ed558SHugh Dickins * cannot return prematurely, while zap_huge_pmd() has 248732ed558SHugh Dickins * cleared *pmd but not decremented compound_mapcount(). 249732ed558SHugh Dickins */ 250b3807a91SHugh Dickins if ((pvmw->flags & PVMW_SYNC) && 251c453d8c7SYang Shi transhuge_vma_suitable(vma, pvmw->address) && 2522aff7a47SMatthew Wilcox (Oracle) (pvmw->nr_pages >= HPAGE_PMD_NR)) { 253732ed558SHugh Dickins spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 254732ed558SHugh Dickins 255732ed558SHugh Dickins spin_unlock(ptl); 256732ed558SHugh Dickins } 257a9a7504dSHugh Dickins step_forward(pvmw, PMD_SIZE); 258a9a7504dSHugh Dickins continue; 259ace71a19SKirill A. Shutemov } 260ace71a19SKirill A. Shutemov if (!map_pte(pvmw)) 261ace71a19SKirill A. Shutemov goto next_pte; 26247446630SHugh Dickins this_pte: 263ace71a19SKirill A. Shutemov if (check_pte(pvmw)) 264ace71a19SKirill A. Shutemov return true; 265d75450ffSHugh Dickins next_pte: 266d75450ffSHugh Dickins do { 267ace71a19SKirill A. Shutemov pvmw->address += PAGE_SIZE; 268494334e4SHugh Dickins if (pvmw->address >= end) 269ace71a19SKirill A. Shutemov return not_found(pvmw); 270ace71a19SKirill A. Shutemov /* Did we cross page table boundary? */ 27144828248SHugh Dickins if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { 272ace71a19SKirill A. Shutemov if (pvmw->ptl) { 273ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 274ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 275ace71a19SKirill A. Shutemov } 27644828248SHugh Dickins pte_unmap(pvmw->pte); 27744828248SHugh Dickins pvmw->pte = NULL; 278ace71a19SKirill A. Shutemov goto restart; 279ace71a19SKirill A. Shutemov } 28044828248SHugh Dickins pvmw->pte++; 281ace71a19SKirill A. Shutemov } while (pte_none(*pvmw->pte)); 282ace71a19SKirill A. Shutemov 283ace71a19SKirill A. Shutemov if (!pvmw->ptl) { 284ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 285ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 286ace71a19SKirill A. Shutemov } 28747446630SHugh Dickins goto this_pte; 288a9a7504dSHugh Dickins } while (pvmw->address < end); 289a9a7504dSHugh Dickins 290a9a7504dSHugh Dickins return false; 291ace71a19SKirill A. Shutemov } 2926a328a62SKirill A. Shutemov 2936a328a62SKirill A. Shutemov /** 2946a328a62SKirill A. Shutemov * page_mapped_in_vma - check whether a page is really mapped in a VMA 2956a328a62SKirill A. Shutemov * @page: the page to test 2966a328a62SKirill A. Shutemov * @vma: the VMA to test 2976a328a62SKirill A. Shutemov * 2986a328a62SKirill A. Shutemov * Returns 1 if the page is mapped into the page tables of the VMA, 0 2996a328a62SKirill A. Shutemov * if the page is not mapped into the page tables of this VMA. Only 3006a328a62SKirill A. Shutemov * valid for normal file or anonymous VMAs. 3016a328a62SKirill A. Shutemov */ 3026a328a62SKirill A. Shutemov int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 3036a328a62SKirill A. Shutemov { 3046a328a62SKirill A. Shutemov struct page_vma_mapped_walk pvmw = { 3052aff7a47SMatthew Wilcox (Oracle) .pfn = page_to_pfn(page), 3062aff7a47SMatthew Wilcox (Oracle) .nr_pages = 1, 3076a328a62SKirill A. Shutemov .vma = vma, 3086a328a62SKirill A. Shutemov .flags = PVMW_SYNC, 3096a328a62SKirill A. Shutemov }; 3106a328a62SKirill A. Shutemov 311494334e4SHugh Dickins pvmw.address = vma_address(page, vma); 312494334e4SHugh Dickins if (pvmw.address == -EFAULT) 3136a328a62SKirill A. Shutemov return 0; 3146a328a62SKirill A. Shutemov if (!page_vma_mapped_walk(&pvmw)) 3156a328a62SKirill A. Shutemov return 0; 3166a328a62SKirill A. Shutemov page_vma_mapped_walk_done(&pvmw); 3176a328a62SKirill A. Shutemov return 1; 3186a328a62SKirill A. Shutemov } 319