1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2ace71a19SKirill A. Shutemov #include <linux/mm.h> 3ace71a19SKirill A. Shutemov #include <linux/rmap.h> 4ace71a19SKirill A. Shutemov #include <linux/hugetlb.h> 5ace71a19SKirill A. Shutemov #include <linux/swap.h> 6ace71a19SKirill A. Shutemov #include <linux/swapops.h> 7ace71a19SKirill A. Shutemov 8ace71a19SKirill A. Shutemov #include "internal.h" 9ace71a19SKirill A. Shutemov 10ace71a19SKirill A. Shutemov static inline bool not_found(struct page_vma_mapped_walk *pvmw) 11ace71a19SKirill A. Shutemov { 12ace71a19SKirill A. Shutemov page_vma_mapped_walk_done(pvmw); 13ace71a19SKirill A. Shutemov return false; 14ace71a19SKirill A. Shutemov } 15ace71a19SKirill A. Shutemov 16ace71a19SKirill A. Shutemov static bool map_pte(struct page_vma_mapped_walk *pvmw) 17ace71a19SKirill A. Shutemov { 18ace71a19SKirill A. Shutemov pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address); 19ace71a19SKirill A. Shutemov if (!(pvmw->flags & PVMW_SYNC)) { 20ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 21ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 22ace71a19SKirill A. Shutemov return false; 23ace71a19SKirill A. Shutemov } else { 24aab8d052SRalph Campbell /* 25aab8d052SRalph Campbell * We get here when we are trying to unmap a private 26aab8d052SRalph Campbell * device page from the process address space. Such 27aab8d052SRalph Campbell * page is not CPU accessible and thus is mapped as 28aab8d052SRalph Campbell * a special swap entry, nonetheless it still does 29aab8d052SRalph Campbell * count as a valid regular mapping for the page (and 30aab8d052SRalph Campbell * is accounted as such in page maps count). 31aab8d052SRalph Campbell * 32aab8d052SRalph Campbell * So handle this special case as if it was a normal 33aab8d052SRalph Campbell * page mapping ie lock CPU page table and returns 34aab8d052SRalph Campbell * true. 35aab8d052SRalph Campbell * 36aab8d052SRalph Campbell * For more details on device private memory see HMM 37aab8d052SRalph Campbell * (include/linux/hmm.h or mm/hmm.c). 38aab8d052SRalph Campbell */ 39aab8d052SRalph Campbell if (is_swap_pte(*pvmw->pte)) { 40aab8d052SRalph Campbell swp_entry_t entry; 41aab8d052SRalph Campbell 42aab8d052SRalph Campbell /* Handle un-addressable ZONE_DEVICE memory */ 43aab8d052SRalph Campbell entry = pte_to_swp_entry(*pvmw->pte); 44b756a3b5SAlistair Popple if (!is_device_private_entry(entry) && 45b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 46aab8d052SRalph Campbell return false; 47aab8d052SRalph Campbell } else if (!pte_present(*pvmw->pte)) 48ace71a19SKirill A. Shutemov return false; 49ace71a19SKirill A. Shutemov } 50ace71a19SKirill A. Shutemov } 51ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd); 52ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 53ace71a19SKirill A. Shutemov return true; 54ace71a19SKirill A. Shutemov } 55ace71a19SKirill A. Shutemov 560d665e7bSKirill A. Shutemov /** 570d665e7bSKirill A. Shutemov * check_pte - check if @pvmw->page is mapped at the @pvmw->pte 58777f303cSAlex Shi * @pvmw: page_vma_mapped_walk struct, includes a pair pte and page for checking 590d665e7bSKirill A. Shutemov * 600d665e7bSKirill A. Shutemov * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* 610d665e7bSKirill A. Shutemov * mapped. check_pte() has to validate this. 620d665e7bSKirill A. Shutemov * 63777f303cSAlex Shi * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to 64777f303cSAlex Shi * arbitrary page. 650d665e7bSKirill A. Shutemov * 660d665e7bSKirill A. Shutemov * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration 670d665e7bSKirill A. Shutemov * entry that points to @pvmw->page or any subpage in case of THP. 680d665e7bSKirill A. Shutemov * 69777f303cSAlex Shi * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to 70777f303cSAlex Shi * pvmw->page or any subpage in case of THP. 710d665e7bSKirill A. Shutemov * 720d665e7bSKirill A. Shutemov * Otherwise, return false. 730d665e7bSKirill A. Shutemov * 740d665e7bSKirill A. Shutemov */ 75ace71a19SKirill A. Shutemov static bool check_pte(struct page_vma_mapped_walk *pvmw) 76ace71a19SKirill A. Shutemov { 770d665e7bSKirill A. Shutemov unsigned long pfn; 780d665e7bSKirill A. Shutemov 79ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 80ace71a19SKirill A. Shutemov swp_entry_t entry; 81ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 82ace71a19SKirill A. Shutemov return false; 83ace71a19SKirill A. Shutemov entry = pte_to_swp_entry(*pvmw->pte); 84a5430ddaSJérôme Glisse 85b756a3b5SAlistair Popple if (!is_migration_entry(entry) && 86b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 87ace71a19SKirill A. Shutemov return false; 880d665e7bSKirill A. Shutemov 89af5cdaf8SAlistair Popple pfn = swp_offset(entry); 900d665e7bSKirill A. Shutemov } else if (is_swap_pte(*pvmw->pte)) { 91a5430ddaSJérôme Glisse swp_entry_t entry; 92a5430ddaSJérôme Glisse 930d665e7bSKirill A. Shutemov /* Handle un-addressable ZONE_DEVICE memory */ 94a5430ddaSJérôme Glisse entry = pte_to_swp_entry(*pvmw->pte); 95b756a3b5SAlistair Popple if (!is_device_private_entry(entry) && 96b756a3b5SAlistair Popple !is_device_exclusive_entry(entry)) 970d665e7bSKirill A. Shutemov return false; 98a5430ddaSJérôme Glisse 99af5cdaf8SAlistair Popple pfn = swp_offset(entry); 1000d665e7bSKirill A. Shutemov } else { 101ace71a19SKirill A. Shutemov if (!pte_present(*pvmw->pte)) 102ace71a19SKirill A. Shutemov return false; 103ace71a19SKirill A. Shutemov 1040d665e7bSKirill A. Shutemov pfn = pte_pfn(*pvmw->pte); 1050d665e7bSKirill A. Shutemov } 1060d665e7bSKirill A. Shutemov 107*2aff7a47SMatthew Wilcox (Oracle) return (pfn - pvmw->pfn) < pvmw->nr_pages; 108*2aff7a47SMatthew Wilcox (Oracle) } 109*2aff7a47SMatthew Wilcox (Oracle) 110*2aff7a47SMatthew Wilcox (Oracle) /* Returns true if the two ranges overlap. Careful to not overflow. */ 111*2aff7a47SMatthew Wilcox (Oracle) static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw) 112*2aff7a47SMatthew Wilcox (Oracle) { 113*2aff7a47SMatthew Wilcox (Oracle) if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn) 114*2aff7a47SMatthew Wilcox (Oracle) return false; 115*2aff7a47SMatthew Wilcox (Oracle) if (pfn > pvmw->pfn + pvmw->nr_pages - 1) 116*2aff7a47SMatthew Wilcox (Oracle) return false; 117*2aff7a47SMatthew Wilcox (Oracle) return true; 118ace71a19SKirill A. Shutemov } 119ace71a19SKirill A. Shutemov 120a9a7504dSHugh Dickins static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) 121a9a7504dSHugh Dickins { 122a9a7504dSHugh Dickins pvmw->address = (pvmw->address + size) & ~(size - 1); 123a9a7504dSHugh Dickins if (!pvmw->address) 124a9a7504dSHugh Dickins pvmw->address = ULONG_MAX; 125a9a7504dSHugh Dickins } 126a9a7504dSHugh Dickins 127ace71a19SKirill A. Shutemov /** 128*2aff7a47SMatthew Wilcox (Oracle) * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at 129ace71a19SKirill A. Shutemov * @pvmw->address 130ace71a19SKirill A. Shutemov * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags 131ace71a19SKirill A. Shutemov * must be set. pmd, pte and ptl must be NULL. 132ace71a19SKirill A. Shutemov * 133ace71a19SKirill A. Shutemov * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point 134ace71a19SKirill A. Shutemov * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is 135ace71a19SKirill A. Shutemov * adjusted if needed (for PTE-mapped THPs). 136ace71a19SKirill A. Shutemov * 137ace71a19SKirill A. Shutemov * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page 138ace71a19SKirill A. Shutemov * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in 139ace71a19SKirill A. Shutemov * a loop to find all PTEs that map the THP. 140ace71a19SKirill A. Shutemov * 141ace71a19SKirill A. Shutemov * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry 142ace71a19SKirill A. Shutemov * regardless of which page table level the page is mapped at. @pvmw->pmd is 143ace71a19SKirill A. Shutemov * NULL. 144ace71a19SKirill A. Shutemov * 145baf2f90bSLu Jialin * Returns false if there are no more page table entries for the page in 146ace71a19SKirill A. Shutemov * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. 147ace71a19SKirill A. Shutemov * 148ace71a19SKirill A. Shutemov * If you need to stop the walk before page_vma_mapped_walk() returned false, 149ace71a19SKirill A. Shutemov * use page_vma_mapped_walk_done(). It will do the housekeeping. 150ace71a19SKirill A. Shutemov */ 151ace71a19SKirill A. Shutemov bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) 152ace71a19SKirill A. Shutemov { 153*2aff7a47SMatthew Wilcox (Oracle) struct vm_area_struct *vma = pvmw->vma; 154*2aff7a47SMatthew Wilcox (Oracle) struct mm_struct *mm = vma->vm_mm; 15547446630SHugh Dickins unsigned long end; 156ace71a19SKirill A. Shutemov pgd_t *pgd; 157c2febafcSKirill A. Shutemov p4d_t *p4d; 158ace71a19SKirill A. Shutemov pud_t *pud; 159a7b10095SWill Deacon pmd_t pmde; 160ace71a19SKirill A. Shutemov 161ace71a19SKirill A. Shutemov /* The only possible pmd mapping has been handled on last iteration */ 162ace71a19SKirill A. Shutemov if (pvmw->pmd && !pvmw->pte) 163ace71a19SKirill A. Shutemov return not_found(pvmw); 164ace71a19SKirill A. Shutemov 165*2aff7a47SMatthew Wilcox (Oracle) if (unlikely(is_vm_hugetlb_page(vma))) { 166*2aff7a47SMatthew Wilcox (Oracle) unsigned long size = pvmw->nr_pages * PAGE_SIZE; 1676d0fd598SHugh Dickins /* The only possible mapping was handled on last iteration */ 1686d0fd598SHugh Dickins if (pvmw->pte) 1696d0fd598SHugh Dickins return not_found(pvmw); 1706d0fd598SHugh Dickins 171ace71a19SKirill A. Shutemov /* when pud is not present, pte will be NULL */ 172*2aff7a47SMatthew Wilcox (Oracle) pvmw->pte = huge_pte_offset(mm, pvmw->address, size); 173ace71a19SKirill A. Shutemov if (!pvmw->pte) 174ace71a19SKirill A. Shutemov return false; 175ace71a19SKirill A. Shutemov 176*2aff7a47SMatthew Wilcox (Oracle) pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm, 177*2aff7a47SMatthew Wilcox (Oracle) pvmw->pte); 178ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 179ace71a19SKirill A. Shutemov if (!check_pte(pvmw)) 180ace71a19SKirill A. Shutemov return not_found(pvmw); 181ace71a19SKirill A. Shutemov return true; 182ace71a19SKirill A. Shutemov } 1836d0fd598SHugh Dickins 184*2aff7a47SMatthew Wilcox (Oracle) end = vma_address_end(pvmw); 1856d0fd598SHugh Dickins if (pvmw->pte) 1866d0fd598SHugh Dickins goto next_pte; 187ace71a19SKirill A. Shutemov restart: 188a9a7504dSHugh Dickins do { 189ace71a19SKirill A. Shutemov pgd = pgd_offset(mm, pvmw->address); 190a9a7504dSHugh Dickins if (!pgd_present(*pgd)) { 191a9a7504dSHugh Dickins step_forward(pvmw, PGDIR_SIZE); 192a9a7504dSHugh Dickins continue; 193a9a7504dSHugh Dickins } 194c2febafcSKirill A. Shutemov p4d = p4d_offset(pgd, pvmw->address); 195a9a7504dSHugh Dickins if (!p4d_present(*p4d)) { 196a9a7504dSHugh Dickins step_forward(pvmw, P4D_SIZE); 197a9a7504dSHugh Dickins continue; 198a9a7504dSHugh Dickins } 199c2febafcSKirill A. Shutemov pud = pud_offset(p4d, pvmw->address); 200a9a7504dSHugh Dickins if (!pud_present(*pud)) { 201a9a7504dSHugh Dickins step_forward(pvmw, PUD_SIZE); 202a9a7504dSHugh Dickins continue; 203a9a7504dSHugh Dickins } 204b3807a91SHugh Dickins 205ace71a19SKirill A. Shutemov pvmw->pmd = pmd_offset(pud, pvmw->address); 206a7b10095SWill Deacon /* 207a7b10095SWill Deacon * Make sure the pmd value isn't cached in a register by the 208a7b10095SWill Deacon * compiler and used as a stale value after we've observed a 209a7b10095SWill Deacon * subsequent update. 210a7b10095SWill Deacon */ 211a7b10095SWill Deacon pmde = READ_ONCE(*pvmw->pmd); 212b3807a91SHugh Dickins 213a7b10095SWill Deacon if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { 214ace71a19SKirill A. Shutemov pvmw->ptl = pmd_lock(mm, pvmw->pmd); 2153306d311SHugh Dickins pmde = *pvmw->pmd; 2163306d311SHugh Dickins if (likely(pmd_trans_huge(pmde))) { 217ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) 218ace71a19SKirill A. Shutemov return not_found(pvmw); 219*2aff7a47SMatthew Wilcox (Oracle) if (!check_pmd(pmd_pfn(pmde), pvmw)) 220ace71a19SKirill A. Shutemov return not_found(pvmw); 221ace71a19SKirill A. Shutemov return true; 222e2e1d407SHugh Dickins } 223e2e1d407SHugh Dickins if (!pmd_present(pmde)) { 224e2e1d407SHugh Dickins swp_entry_t entry; 225616b8371SZi Yan 226e2e1d407SHugh Dickins if (!thp_migration_supported() || 227e2e1d407SHugh Dickins !(pvmw->flags & PVMW_MIGRATION)) 228e2e1d407SHugh Dickins return not_found(pvmw); 229e2e1d407SHugh Dickins entry = pmd_to_swp_entry(pmde); 230e2e1d407SHugh Dickins if (!is_migration_entry(entry) || 231*2aff7a47SMatthew Wilcox (Oracle) !check_pmd(swp_offset(entry), pvmw)) 232616b8371SZi Yan return not_found(pvmw); 233616b8371SZi Yan return true; 234616b8371SZi Yan } 235ace71a19SKirill A. Shutemov /* THP pmd was split under us: handle on pte level */ 236ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 237ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 238a7b10095SWill Deacon } else if (!pmd_present(pmde)) { 239732ed558SHugh Dickins /* 240732ed558SHugh Dickins * If PVMW_SYNC, take and drop THP pmd lock so that we 241732ed558SHugh Dickins * cannot return prematurely, while zap_huge_pmd() has 242732ed558SHugh Dickins * cleared *pmd but not decremented compound_mapcount(). 243732ed558SHugh Dickins */ 244b3807a91SHugh Dickins if ((pvmw->flags & PVMW_SYNC) && 245*2aff7a47SMatthew Wilcox (Oracle) transparent_hugepage_active(vma) && 246*2aff7a47SMatthew Wilcox (Oracle) (pvmw->nr_pages >= HPAGE_PMD_NR)) { 247732ed558SHugh Dickins spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 248732ed558SHugh Dickins 249732ed558SHugh Dickins spin_unlock(ptl); 250732ed558SHugh Dickins } 251a9a7504dSHugh Dickins step_forward(pvmw, PMD_SIZE); 252a9a7504dSHugh Dickins continue; 253ace71a19SKirill A. Shutemov } 254ace71a19SKirill A. Shutemov if (!map_pte(pvmw)) 255ace71a19SKirill A. Shutemov goto next_pte; 25647446630SHugh Dickins this_pte: 257ace71a19SKirill A. Shutemov if (check_pte(pvmw)) 258ace71a19SKirill A. Shutemov return true; 259d75450ffSHugh Dickins next_pte: 260d75450ffSHugh Dickins do { 261ace71a19SKirill A. Shutemov pvmw->address += PAGE_SIZE; 262494334e4SHugh Dickins if (pvmw->address >= end) 263ace71a19SKirill A. Shutemov return not_found(pvmw); 264ace71a19SKirill A. Shutemov /* Did we cross page table boundary? */ 26544828248SHugh Dickins if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { 266ace71a19SKirill A. Shutemov if (pvmw->ptl) { 267ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 268ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 269ace71a19SKirill A. Shutemov } 27044828248SHugh Dickins pte_unmap(pvmw->pte); 27144828248SHugh Dickins pvmw->pte = NULL; 272ace71a19SKirill A. Shutemov goto restart; 273ace71a19SKirill A. Shutemov } 27444828248SHugh Dickins pvmw->pte++; 275a7a69d8bSHugh Dickins if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { 276a7a69d8bSHugh Dickins pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 277a7a69d8bSHugh Dickins spin_lock(pvmw->ptl); 278a7a69d8bSHugh Dickins } 279ace71a19SKirill A. Shutemov } while (pte_none(*pvmw->pte)); 280ace71a19SKirill A. Shutemov 281ace71a19SKirill A. Shutemov if (!pvmw->ptl) { 282ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 283ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 284ace71a19SKirill A. Shutemov } 28547446630SHugh Dickins goto this_pte; 286a9a7504dSHugh Dickins } while (pvmw->address < end); 287a9a7504dSHugh Dickins 288a9a7504dSHugh Dickins return false; 289ace71a19SKirill A. Shutemov } 2906a328a62SKirill A. Shutemov 2916a328a62SKirill A. Shutemov /** 2926a328a62SKirill A. Shutemov * page_mapped_in_vma - check whether a page is really mapped in a VMA 2936a328a62SKirill A. Shutemov * @page: the page to test 2946a328a62SKirill A. Shutemov * @vma: the VMA to test 2956a328a62SKirill A. Shutemov * 2966a328a62SKirill A. Shutemov * Returns 1 if the page is mapped into the page tables of the VMA, 0 2976a328a62SKirill A. Shutemov * if the page is not mapped into the page tables of this VMA. Only 2986a328a62SKirill A. Shutemov * valid for normal file or anonymous VMAs. 2996a328a62SKirill A. Shutemov */ 3006a328a62SKirill A. Shutemov int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 3016a328a62SKirill A. Shutemov { 3026a328a62SKirill A. Shutemov struct page_vma_mapped_walk pvmw = { 303*2aff7a47SMatthew Wilcox (Oracle) .pfn = page_to_pfn(page), 304*2aff7a47SMatthew Wilcox (Oracle) .nr_pages = 1, 3056a328a62SKirill A. Shutemov .vma = vma, 3066a328a62SKirill A. Shutemov .flags = PVMW_SYNC, 3076a328a62SKirill A. Shutemov }; 3086a328a62SKirill A. Shutemov 309494334e4SHugh Dickins pvmw.address = vma_address(page, vma); 310494334e4SHugh Dickins if (pvmw.address == -EFAULT) 3116a328a62SKirill A. Shutemov return 0; 3126a328a62SKirill A. Shutemov if (!page_vma_mapped_walk(&pvmw)) 3136a328a62SKirill A. Shutemov return 0; 3146a328a62SKirill A. Shutemov page_vma_mapped_walk_done(&pvmw); 3156a328a62SKirill A. Shutemov return 1; 3166a328a62SKirill A. Shutemov } 317