1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2ace71a19SKirill A. Shutemov #include <linux/mm.h> 3ace71a19SKirill A. Shutemov #include <linux/rmap.h> 4ace71a19SKirill A. Shutemov #include <linux/hugetlb.h> 5ace71a19SKirill A. Shutemov #include <linux/swap.h> 6ace71a19SKirill A. Shutemov #include <linux/swapops.h> 7ace71a19SKirill A. Shutemov 8ace71a19SKirill A. Shutemov #include "internal.h" 9ace71a19SKirill A. Shutemov 10ace71a19SKirill A. Shutemov static inline bool not_found(struct page_vma_mapped_walk *pvmw) 11ace71a19SKirill A. Shutemov { 12ace71a19SKirill A. Shutemov page_vma_mapped_walk_done(pvmw); 13ace71a19SKirill A. Shutemov return false; 14ace71a19SKirill A. Shutemov } 15ace71a19SKirill A. Shutemov 16ace71a19SKirill A. Shutemov static bool map_pte(struct page_vma_mapped_walk *pvmw) 17ace71a19SKirill A. Shutemov { 18ace71a19SKirill A. Shutemov pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address); 19ace71a19SKirill A. Shutemov if (!(pvmw->flags & PVMW_SYNC)) { 20ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 21ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 22ace71a19SKirill A. Shutemov return false; 23ace71a19SKirill A. Shutemov } else { 24aab8d052SRalph Campbell /* 25aab8d052SRalph Campbell * We get here when we are trying to unmap a private 26aab8d052SRalph Campbell * device page from the process address space. Such 27aab8d052SRalph Campbell * page is not CPU accessible and thus is mapped as 28aab8d052SRalph Campbell * a special swap entry, nonetheless it still does 29aab8d052SRalph Campbell * count as a valid regular mapping for the page (and 30aab8d052SRalph Campbell * is accounted as such in page maps count). 31aab8d052SRalph Campbell * 32aab8d052SRalph Campbell * So handle this special case as if it was a normal 33aab8d052SRalph Campbell * page mapping ie lock CPU page table and returns 34aab8d052SRalph Campbell * true. 35aab8d052SRalph Campbell * 36aab8d052SRalph Campbell * For more details on device private memory see HMM 37aab8d052SRalph Campbell * (include/linux/hmm.h or mm/hmm.c). 38aab8d052SRalph Campbell */ 39aab8d052SRalph Campbell if (is_swap_pte(*pvmw->pte)) { 40aab8d052SRalph Campbell swp_entry_t entry; 41aab8d052SRalph Campbell 42aab8d052SRalph Campbell /* Handle un-addressable ZONE_DEVICE memory */ 43aab8d052SRalph Campbell entry = pte_to_swp_entry(*pvmw->pte); 44aab8d052SRalph Campbell if (!is_device_private_entry(entry)) 45aab8d052SRalph Campbell return false; 46aab8d052SRalph Campbell } else if (!pte_present(*pvmw->pte)) 47ace71a19SKirill A. Shutemov return false; 48ace71a19SKirill A. Shutemov } 49ace71a19SKirill A. Shutemov } 50ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd); 51ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 52ace71a19SKirill A. Shutemov return true; 53ace71a19SKirill A. Shutemov } 54ace71a19SKirill A. Shutemov 555b8d6e37SLi Xinhai static inline bool pfn_is_match(struct page *page, unsigned long pfn) 567222708eSKirill A. Shutemov { 575b8d6e37SLi Xinhai unsigned long page_pfn = page_to_pfn(page); 585b8d6e37SLi Xinhai 595b8d6e37SLi Xinhai /* normal page and hugetlbfs page */ 605b8d6e37SLi Xinhai if (!PageTransCompound(page) || PageHuge(page)) 615b8d6e37SLi Xinhai return page_pfn == pfn; 627222708eSKirill A. Shutemov 637222708eSKirill A. Shutemov /* THP can be referenced by any subpage */ 646c357848SMatthew Wilcox (Oracle) return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page); 657222708eSKirill A. Shutemov } 667222708eSKirill A. Shutemov 670d665e7bSKirill A. Shutemov /** 680d665e7bSKirill A. Shutemov * check_pte - check if @pvmw->page is mapped at the @pvmw->pte 69777f303cSAlex Shi * @pvmw: page_vma_mapped_walk struct, includes a pair pte and page for checking 700d665e7bSKirill A. Shutemov * 710d665e7bSKirill A. Shutemov * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* 720d665e7bSKirill A. Shutemov * mapped. check_pte() has to validate this. 730d665e7bSKirill A. Shutemov * 74777f303cSAlex Shi * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to 75777f303cSAlex Shi * arbitrary page. 760d665e7bSKirill A. Shutemov * 770d665e7bSKirill A. Shutemov * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration 780d665e7bSKirill A. Shutemov * entry that points to @pvmw->page or any subpage in case of THP. 790d665e7bSKirill A. Shutemov * 80777f303cSAlex Shi * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to 81777f303cSAlex Shi * pvmw->page or any subpage in case of THP. 820d665e7bSKirill A. Shutemov * 830d665e7bSKirill A. Shutemov * Otherwise, return false. 840d665e7bSKirill A. Shutemov * 850d665e7bSKirill A. Shutemov */ 86ace71a19SKirill A. Shutemov static bool check_pte(struct page_vma_mapped_walk *pvmw) 87ace71a19SKirill A. Shutemov { 880d665e7bSKirill A. Shutemov unsigned long pfn; 890d665e7bSKirill A. Shutemov 90ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) { 91ace71a19SKirill A. Shutemov swp_entry_t entry; 92ace71a19SKirill A. Shutemov if (!is_swap_pte(*pvmw->pte)) 93ace71a19SKirill A. Shutemov return false; 94ace71a19SKirill A. Shutemov entry = pte_to_swp_entry(*pvmw->pte); 95a5430ddaSJérôme Glisse 96ace71a19SKirill A. Shutemov if (!is_migration_entry(entry)) 97ace71a19SKirill A. Shutemov return false; 980d665e7bSKirill A. Shutemov 990d665e7bSKirill A. Shutemov pfn = migration_entry_to_pfn(entry); 1000d665e7bSKirill A. Shutemov } else if (is_swap_pte(*pvmw->pte)) { 101a5430ddaSJérôme Glisse swp_entry_t entry; 102a5430ddaSJérôme Glisse 1030d665e7bSKirill A. Shutemov /* Handle un-addressable ZONE_DEVICE memory */ 104a5430ddaSJérôme Glisse entry = pte_to_swp_entry(*pvmw->pte); 1050d665e7bSKirill A. Shutemov if (!is_device_private_entry(entry)) 1060d665e7bSKirill A. Shutemov return false; 107a5430ddaSJérôme Glisse 1080d665e7bSKirill A. Shutemov pfn = device_private_entry_to_pfn(entry); 1090d665e7bSKirill A. Shutemov } else { 110ace71a19SKirill A. Shutemov if (!pte_present(*pvmw->pte)) 111ace71a19SKirill A. Shutemov return false; 112ace71a19SKirill A. Shutemov 1130d665e7bSKirill A. Shutemov pfn = pte_pfn(*pvmw->pte); 1140d665e7bSKirill A. Shutemov } 1150d665e7bSKirill A. Shutemov 1165b8d6e37SLi Xinhai return pfn_is_match(pvmw->page, pfn); 117ace71a19SKirill A. Shutemov } 118ace71a19SKirill A. Shutemov 119ace71a19SKirill A. Shutemov /** 120ace71a19SKirill A. Shutemov * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at 121ace71a19SKirill A. Shutemov * @pvmw->address 122ace71a19SKirill A. Shutemov * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags 123ace71a19SKirill A. Shutemov * must be set. pmd, pte and ptl must be NULL. 124ace71a19SKirill A. Shutemov * 125ace71a19SKirill A. Shutemov * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point 126ace71a19SKirill A. Shutemov * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is 127ace71a19SKirill A. Shutemov * adjusted if needed (for PTE-mapped THPs). 128ace71a19SKirill A. Shutemov * 129ace71a19SKirill A. Shutemov * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page 130ace71a19SKirill A. Shutemov * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in 131ace71a19SKirill A. Shutemov * a loop to find all PTEs that map the THP. 132ace71a19SKirill A. Shutemov * 133ace71a19SKirill A. Shutemov * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry 134ace71a19SKirill A. Shutemov * regardless of which page table level the page is mapped at. @pvmw->pmd is 135ace71a19SKirill A. Shutemov * NULL. 136ace71a19SKirill A. Shutemov * 137baf2f90bSLu Jialin * Returns false if there are no more page table entries for the page in 138ace71a19SKirill A. Shutemov * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. 139ace71a19SKirill A. Shutemov * 140ace71a19SKirill A. Shutemov * If you need to stop the walk before page_vma_mapped_walk() returned false, 141ace71a19SKirill A. Shutemov * use page_vma_mapped_walk_done(). It will do the housekeeping. 142ace71a19SKirill A. Shutemov */ 143ace71a19SKirill A. Shutemov bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) 144ace71a19SKirill A. Shutemov { 145ace71a19SKirill A. Shutemov struct mm_struct *mm = pvmw->vma->vm_mm; 146ace71a19SKirill A. Shutemov struct page *page = pvmw->page; 147ace71a19SKirill A. Shutemov pgd_t *pgd; 148c2febafcSKirill A. Shutemov p4d_t *p4d; 149ace71a19SKirill A. Shutemov pud_t *pud; 150a7b10095SWill Deacon pmd_t pmde; 151ace71a19SKirill A. Shutemov 152ace71a19SKirill A. Shutemov /* The only possible pmd mapping has been handled on last iteration */ 153ace71a19SKirill A. Shutemov if (pvmw->pmd && !pvmw->pte) 154ace71a19SKirill A. Shutemov return not_found(pvmw); 155ace71a19SKirill A. Shutemov 156f003c03bSHugh Dickins if (unlikely(PageHuge(page))) { 1576d0fd598SHugh Dickins /* The only possible mapping was handled on last iteration */ 1586d0fd598SHugh Dickins if (pvmw->pte) 1596d0fd598SHugh Dickins return not_found(pvmw); 1606d0fd598SHugh Dickins 161ace71a19SKirill A. Shutemov /* when pud is not present, pte will be NULL */ 162a50b854eSMatthew Wilcox (Oracle) pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); 163ace71a19SKirill A. Shutemov if (!pvmw->pte) 164ace71a19SKirill A. Shutemov return false; 165ace71a19SKirill A. Shutemov 166ace71a19SKirill A. Shutemov pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte); 167ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 168ace71a19SKirill A. Shutemov if (!check_pte(pvmw)) 169ace71a19SKirill A. Shutemov return not_found(pvmw); 170ace71a19SKirill A. Shutemov return true; 171ace71a19SKirill A. Shutemov } 1726d0fd598SHugh Dickins 1736d0fd598SHugh Dickins if (pvmw->pte) 1746d0fd598SHugh Dickins goto next_pte; 175ace71a19SKirill A. Shutemov restart: 176ace71a19SKirill A. Shutemov pgd = pgd_offset(mm, pvmw->address); 177ace71a19SKirill A. Shutemov if (!pgd_present(*pgd)) 178ace71a19SKirill A. Shutemov return false; 179c2febafcSKirill A. Shutemov p4d = p4d_offset(pgd, pvmw->address); 180c2febafcSKirill A. Shutemov if (!p4d_present(*p4d)) 181c2febafcSKirill A. Shutemov return false; 182c2febafcSKirill A. Shutemov pud = pud_offset(p4d, pvmw->address); 183ace71a19SKirill A. Shutemov if (!pud_present(*pud)) 184ace71a19SKirill A. Shutemov return false; 185ace71a19SKirill A. Shutemov pvmw->pmd = pmd_offset(pud, pvmw->address); 186a7b10095SWill Deacon /* 187a7b10095SWill Deacon * Make sure the pmd value isn't cached in a register by the 188a7b10095SWill Deacon * compiler and used as a stale value after we've observed a 189a7b10095SWill Deacon * subsequent update. 190a7b10095SWill Deacon */ 191a7b10095SWill Deacon pmde = READ_ONCE(*pvmw->pmd); 192a7b10095SWill Deacon if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { 193ace71a19SKirill A. Shutemov pvmw->ptl = pmd_lock(mm, pvmw->pmd); 194*3306d311SHugh Dickins pmde = *pvmw->pmd; 195*3306d311SHugh Dickins if (likely(pmd_trans_huge(pmde))) { 196ace71a19SKirill A. Shutemov if (pvmw->flags & PVMW_MIGRATION) 197ace71a19SKirill A. Shutemov return not_found(pvmw); 198*3306d311SHugh Dickins if (pmd_page(pmde) != page) 199ace71a19SKirill A. Shutemov return not_found(pvmw); 200ace71a19SKirill A. Shutemov return true; 201*3306d311SHugh Dickins } else if (!pmd_present(pmde)) { 202616b8371SZi Yan if (thp_migration_supported()) { 203616b8371SZi Yan if (!(pvmw->flags & PVMW_MIGRATION)) 204616b8371SZi Yan return not_found(pvmw); 205*3306d311SHugh Dickins if (is_migration_entry(pmd_to_swp_entry(pmde))) { 206*3306d311SHugh Dickins swp_entry_t entry = pmd_to_swp_entry(pmde); 207616b8371SZi Yan 208616b8371SZi Yan if (migration_entry_to_page(entry) != page) 209616b8371SZi Yan return not_found(pvmw); 210616b8371SZi Yan return true; 211616b8371SZi Yan } 212af0db981SZi Yan } 213616b8371SZi Yan return not_found(pvmw); 214ace71a19SKirill A. Shutemov } else { 215ace71a19SKirill A. Shutemov /* THP pmd was split under us: handle on pte level */ 216ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 217ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 218ace71a19SKirill A. Shutemov } 219a7b10095SWill Deacon } else if (!pmd_present(pmde)) { 220732ed558SHugh Dickins /* 221732ed558SHugh Dickins * If PVMW_SYNC, take and drop THP pmd lock so that we 222732ed558SHugh Dickins * cannot return prematurely, while zap_huge_pmd() has 223732ed558SHugh Dickins * cleared *pmd but not decremented compound_mapcount(). 224732ed558SHugh Dickins */ 225f003c03bSHugh Dickins if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { 226732ed558SHugh Dickins spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 227732ed558SHugh Dickins 228732ed558SHugh Dickins spin_unlock(ptl); 229732ed558SHugh Dickins } 230ace71a19SKirill A. Shutemov return false; 231ace71a19SKirill A. Shutemov } 232ace71a19SKirill A. Shutemov if (!map_pte(pvmw)) 233ace71a19SKirill A. Shutemov goto next_pte; 234ace71a19SKirill A. Shutemov while (1) { 235494334e4SHugh Dickins unsigned long end; 236494334e4SHugh Dickins 237ace71a19SKirill A. Shutemov if (check_pte(pvmw)) 238ace71a19SKirill A. Shutemov return true; 239d75450ffSHugh Dickins next_pte: 240d75450ffSHugh Dickins /* Seek to next pte only makes sense for THP */ 2416d0fd598SHugh Dickins if (!PageTransHuge(page)) 242d75450ffSHugh Dickins return not_found(pvmw); 243f003c03bSHugh Dickins end = vma_address_end(page, pvmw->vma); 244d75450ffSHugh Dickins do { 245ace71a19SKirill A. Shutemov pvmw->address += PAGE_SIZE; 246494334e4SHugh Dickins if (pvmw->address >= end) 247ace71a19SKirill A. Shutemov return not_found(pvmw); 248ace71a19SKirill A. Shutemov /* Did we cross page table boundary? */ 249ace71a19SKirill A. Shutemov if (pvmw->address % PMD_SIZE == 0) { 250ace71a19SKirill A. Shutemov pte_unmap(pvmw->pte); 251ace71a19SKirill A. Shutemov if (pvmw->ptl) { 252ace71a19SKirill A. Shutemov spin_unlock(pvmw->ptl); 253ace71a19SKirill A. Shutemov pvmw->ptl = NULL; 254ace71a19SKirill A. Shutemov } 255ace71a19SKirill A. Shutemov goto restart; 256ace71a19SKirill A. Shutemov } else { 257ace71a19SKirill A. Shutemov pvmw->pte++; 258ace71a19SKirill A. Shutemov } 259ace71a19SKirill A. Shutemov } while (pte_none(*pvmw->pte)); 260ace71a19SKirill A. Shutemov 261ace71a19SKirill A. Shutemov if (!pvmw->ptl) { 262ace71a19SKirill A. Shutemov pvmw->ptl = pte_lockptr(mm, pvmw->pmd); 263ace71a19SKirill A. Shutemov spin_lock(pvmw->ptl); 264ace71a19SKirill A. Shutemov } 265ace71a19SKirill A. Shutemov } 266ace71a19SKirill A. Shutemov } 2676a328a62SKirill A. Shutemov 2686a328a62SKirill A. Shutemov /** 2696a328a62SKirill A. Shutemov * page_mapped_in_vma - check whether a page is really mapped in a VMA 2706a328a62SKirill A. Shutemov * @page: the page to test 2716a328a62SKirill A. Shutemov * @vma: the VMA to test 2726a328a62SKirill A. Shutemov * 2736a328a62SKirill A. Shutemov * Returns 1 if the page is mapped into the page tables of the VMA, 0 2746a328a62SKirill A. Shutemov * if the page is not mapped into the page tables of this VMA. Only 2756a328a62SKirill A. Shutemov * valid for normal file or anonymous VMAs. 2766a328a62SKirill A. Shutemov */ 2776a328a62SKirill A. Shutemov int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 2786a328a62SKirill A. Shutemov { 2796a328a62SKirill A. Shutemov struct page_vma_mapped_walk pvmw = { 2806a328a62SKirill A. Shutemov .page = page, 2816a328a62SKirill A. Shutemov .vma = vma, 2826a328a62SKirill A. Shutemov .flags = PVMW_SYNC, 2836a328a62SKirill A. Shutemov }; 2846a328a62SKirill A. Shutemov 285494334e4SHugh Dickins pvmw.address = vma_address(page, vma); 286494334e4SHugh Dickins if (pvmw.address == -EFAULT) 2876a328a62SKirill A. Shutemov return 0; 2886a328a62SKirill A. Shutemov if (!page_vma_mapped_walk(&pvmw)) 2896a328a62SKirill A. Shutemov return 0; 2906a328a62SKirill A. Shutemov page_vma_mapped_walk_done(&pvmw); 2916a328a62SKirill A. Shutemov return 1; 2926a328a62SKirill A. Shutemov } 293