1e6473092SMatt Mackall #include <linux/mm.h> 2e6473092SMatt Mackall #include <linux/highmem.h> 3e6473092SMatt Mackall #include <linux/sched.h> 4d33b9f45SNaoya Horiguchi #include <linux/hugetlb.h> 5e6473092SMatt Mackall 6e6473092SMatt Mackall static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 72165009bSDave Hansen struct mm_walk *walk) 8e6473092SMatt Mackall { 9e6473092SMatt Mackall pte_t *pte; 10e6473092SMatt Mackall int err = 0; 11e6473092SMatt Mackall 12e6473092SMatt Mackall pte = pte_offset_map(pmd, addr); 13556637cdSJohannes Weiner for (;;) { 142165009bSDave Hansen err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); 15e6473092SMatt Mackall if (err) 16e6473092SMatt Mackall break; 17556637cdSJohannes Weiner addr += PAGE_SIZE; 18556637cdSJohannes Weiner if (addr == end) 19556637cdSJohannes Weiner break; 20556637cdSJohannes Weiner pte++; 21556637cdSJohannes Weiner } 22e6473092SMatt Mackall 23e6473092SMatt Mackall pte_unmap(pte); 24e6473092SMatt Mackall return err; 25e6473092SMatt Mackall } 26e6473092SMatt Mackall 27e6473092SMatt Mackall static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 282165009bSDave Hansen struct mm_walk *walk) 29e6473092SMatt Mackall { 30e6473092SMatt Mackall pmd_t *pmd; 31e6473092SMatt Mackall unsigned long next; 32e6473092SMatt Mackall int err = 0; 33e6473092SMatt Mackall 34e6473092SMatt Mackall pmd = pmd_offset(pud, addr); 35e6473092SMatt Mackall do { 3603319327SDave Hansen again: 37e6473092SMatt Mackall next = pmd_addr_end(addr, end); 3803319327SDave Hansen if (pmd_none(*pmd)) { 39e6473092SMatt Mackall if (walk->pte_hole) 402165009bSDave Hansen err = walk->pte_hole(addr, next, walk); 41e6473092SMatt Mackall if (err) 42e6473092SMatt Mackall break; 43e6473092SMatt Mackall continue; 44e6473092SMatt Mackall } 4503319327SDave Hansen /* 4603319327SDave Hansen * This implies that each ->pmd_entry() handler 4703319327SDave Hansen * needs to know about pmd_trans_huge() pmds 4803319327SDave Hansen */ 49e6473092SMatt Mackall if (walk->pmd_entry) 502165009bSDave Hansen err = walk->pmd_entry(pmd, addr, next, walk); 5103319327SDave Hansen if (err) 5203319327SDave Hansen break; 5303319327SDave Hansen 5403319327SDave Hansen /* 5503319327SDave Hansen * Check this here so we only break down trans_huge 5603319327SDave Hansen * pages when we _need_ to 5703319327SDave Hansen */ 5803319327SDave Hansen if (!walk->pte_entry) 5903319327SDave Hansen continue; 6003319327SDave Hansen 6103319327SDave Hansen split_huge_page_pmd(walk->mm, pmd); 6203319327SDave Hansen if (pmd_none_or_clear_bad(pmd)) 6303319327SDave Hansen goto again; 642165009bSDave Hansen err = walk_pte_range(pmd, addr, next, walk); 65e6473092SMatt Mackall if (err) 66e6473092SMatt Mackall break; 67e6473092SMatt Mackall } while (pmd++, addr = next, addr != end); 68e6473092SMatt Mackall 69e6473092SMatt Mackall return err; 70e6473092SMatt Mackall } 71e6473092SMatt Mackall 72e6473092SMatt Mackall static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, 732165009bSDave Hansen struct mm_walk *walk) 74e6473092SMatt Mackall { 75e6473092SMatt Mackall pud_t *pud; 76e6473092SMatt Mackall unsigned long next; 77e6473092SMatt Mackall int err = 0; 78e6473092SMatt Mackall 79e6473092SMatt Mackall pud = pud_offset(pgd, addr); 80e6473092SMatt Mackall do { 81e6473092SMatt Mackall next = pud_addr_end(addr, end); 82e6473092SMatt Mackall if (pud_none_or_clear_bad(pud)) { 83e6473092SMatt Mackall if (walk->pte_hole) 842165009bSDave Hansen err = walk->pte_hole(addr, next, walk); 85e6473092SMatt Mackall if (err) 86e6473092SMatt Mackall break; 87e6473092SMatt Mackall continue; 88e6473092SMatt Mackall } 89e6473092SMatt Mackall if (walk->pud_entry) 902165009bSDave Hansen err = walk->pud_entry(pud, addr, next, walk); 91e6473092SMatt Mackall if (!err && (walk->pmd_entry || walk->pte_entry)) 922165009bSDave Hansen err = walk_pmd_range(pud, addr, next, walk); 93e6473092SMatt Mackall if (err) 94e6473092SMatt Mackall break; 95e6473092SMatt Mackall } while (pud++, addr = next, addr != end); 96e6473092SMatt Mackall 97e6473092SMatt Mackall return err; 98e6473092SMatt Mackall } 99e6473092SMatt Mackall 100116354d1SNaoya Horiguchi #ifdef CONFIG_HUGETLB_PAGE 101116354d1SNaoya Horiguchi static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, 102116354d1SNaoya Horiguchi unsigned long end) 103116354d1SNaoya Horiguchi { 104116354d1SNaoya Horiguchi unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); 105116354d1SNaoya Horiguchi return boundary < end ? boundary : end; 106116354d1SNaoya Horiguchi } 107116354d1SNaoya Horiguchi 108116354d1SNaoya Horiguchi static int walk_hugetlb_range(struct vm_area_struct *vma, 109116354d1SNaoya Horiguchi unsigned long addr, unsigned long end, 110116354d1SNaoya Horiguchi struct mm_walk *walk) 111116354d1SNaoya Horiguchi { 112116354d1SNaoya Horiguchi struct hstate *h = hstate_vma(vma); 113116354d1SNaoya Horiguchi unsigned long next; 114116354d1SNaoya Horiguchi unsigned long hmask = huge_page_mask(h); 115116354d1SNaoya Horiguchi pte_t *pte; 116116354d1SNaoya Horiguchi int err = 0; 117116354d1SNaoya Horiguchi 118116354d1SNaoya Horiguchi do { 119116354d1SNaoya Horiguchi next = hugetlb_entry_end(h, addr, end); 120116354d1SNaoya Horiguchi pte = huge_pte_offset(walk->mm, addr & hmask); 121116354d1SNaoya Horiguchi if (pte && walk->hugetlb_entry) 122116354d1SNaoya Horiguchi err = walk->hugetlb_entry(pte, hmask, addr, next, walk); 123116354d1SNaoya Horiguchi if (err) 124116354d1SNaoya Horiguchi return err; 125116354d1SNaoya Horiguchi } while (addr = next, addr != end); 126116354d1SNaoya Horiguchi 127116354d1SNaoya Horiguchi return 0; 128116354d1SNaoya Horiguchi } 129*6c6d5280SKOSAKI Motohiro 130*6c6d5280SKOSAKI Motohiro static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) 131*6c6d5280SKOSAKI Motohiro { 132*6c6d5280SKOSAKI Motohiro struct vm_area_struct *vma; 133*6c6d5280SKOSAKI Motohiro 134*6c6d5280SKOSAKI Motohiro /* We don't need vma lookup at all. */ 135*6c6d5280SKOSAKI Motohiro if (!walk->hugetlb_entry) 136*6c6d5280SKOSAKI Motohiro return NULL; 137*6c6d5280SKOSAKI Motohiro 138*6c6d5280SKOSAKI Motohiro VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); 139*6c6d5280SKOSAKI Motohiro vma = find_vma(walk->mm, addr); 140*6c6d5280SKOSAKI Motohiro if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) 141*6c6d5280SKOSAKI Motohiro return vma; 142*6c6d5280SKOSAKI Motohiro 143*6c6d5280SKOSAKI Motohiro return NULL; 144*6c6d5280SKOSAKI Motohiro } 145*6c6d5280SKOSAKI Motohiro 146*6c6d5280SKOSAKI Motohiro #else /* CONFIG_HUGETLB_PAGE */ 147*6c6d5280SKOSAKI Motohiro static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) 148*6c6d5280SKOSAKI Motohiro { 149*6c6d5280SKOSAKI Motohiro return NULL; 150*6c6d5280SKOSAKI Motohiro } 151*6c6d5280SKOSAKI Motohiro 152*6c6d5280SKOSAKI Motohiro static int walk_hugetlb_range(struct vm_area_struct *vma, 153*6c6d5280SKOSAKI Motohiro unsigned long addr, unsigned long end, 154*6c6d5280SKOSAKI Motohiro struct mm_walk *walk) 155*6c6d5280SKOSAKI Motohiro { 156*6c6d5280SKOSAKI Motohiro return 0; 157*6c6d5280SKOSAKI Motohiro } 158*6c6d5280SKOSAKI Motohiro 159*6c6d5280SKOSAKI Motohiro #endif /* CONFIG_HUGETLB_PAGE */ 160*6c6d5280SKOSAKI Motohiro 161*6c6d5280SKOSAKI Motohiro 162116354d1SNaoya Horiguchi 163e6473092SMatt Mackall /** 164e6473092SMatt Mackall * walk_page_range - walk a memory map's page tables with a callback 1657682486bSRandy Dunlap * @mm: memory map to walk 1667682486bSRandy Dunlap * @addr: starting address 1677682486bSRandy Dunlap * @end: ending address 1687682486bSRandy Dunlap * @walk: set of callbacks to invoke for each level of the tree 169e6473092SMatt Mackall * 170e6473092SMatt Mackall * Recursively walk the page table for the memory area in a VMA, 171e6473092SMatt Mackall * calling supplied callbacks. Callbacks are called in-order (first 172e6473092SMatt Mackall * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, 173e6473092SMatt Mackall * etc.). If lower-level callbacks are omitted, walking depth is reduced. 174e6473092SMatt Mackall * 1752165009bSDave Hansen * Each callback receives an entry pointer and the start and end of the 1762165009bSDave Hansen * associated range, and a copy of the original mm_walk for access to 1772165009bSDave Hansen * the ->private or ->mm fields. 178e6473092SMatt Mackall * 179e6473092SMatt Mackall * No locks are taken, but the bottom level iterator will map PTE 180e6473092SMatt Mackall * directories from highmem if necessary. 181e6473092SMatt Mackall * 182e6473092SMatt Mackall * If any callback returns a non-zero value, the walk is aborted and 183e6473092SMatt Mackall * the return value is propagated back to the caller. Otherwise 0 is returned. 184e6473092SMatt Mackall */ 1852165009bSDave Hansen int walk_page_range(unsigned long addr, unsigned long end, 1862165009bSDave Hansen struct mm_walk *walk) 187e6473092SMatt Mackall { 188e6473092SMatt Mackall pgd_t *pgd; 189e6473092SMatt Mackall unsigned long next; 190e6473092SMatt Mackall int err = 0; 191e6473092SMatt Mackall 192e6473092SMatt Mackall if (addr >= end) 193e6473092SMatt Mackall return err; 194e6473092SMatt Mackall 1952165009bSDave Hansen if (!walk->mm) 1962165009bSDave Hansen return -EINVAL; 1972165009bSDave Hansen 1982165009bSDave Hansen pgd = pgd_offset(walk->mm, addr); 199e6473092SMatt Mackall do { 200*6c6d5280SKOSAKI Motohiro struct vm_area_struct *vma; 2015f0af70aSDavid Sterba 202e6473092SMatt Mackall next = pgd_addr_end(addr, end); 203d33b9f45SNaoya Horiguchi 2045dc37642SNaoya Horiguchi /* 2055dc37642SNaoya Horiguchi * handle hugetlb vma individually because pagetable walk for 2065dc37642SNaoya Horiguchi * the hugetlb page is dependent on the architecture and 2075dc37642SNaoya Horiguchi * we can't handled it in the same manner as non-huge pages. 2085dc37642SNaoya Horiguchi */ 209*6c6d5280SKOSAKI Motohiro vma = hugetlb_vma(addr, walk); 210*6c6d5280SKOSAKI Motohiro if (vma) { 211d33b9f45SNaoya Horiguchi if (vma->vm_end < next) 212d33b9f45SNaoya Horiguchi next = vma->vm_end; 213116354d1SNaoya Horiguchi /* 214116354d1SNaoya Horiguchi * Hugepage is very tightly coupled with vma, so 215116354d1SNaoya Horiguchi * walk through hugetlb entries within a given vma. 216116354d1SNaoya Horiguchi */ 217116354d1SNaoya Horiguchi err = walk_hugetlb_range(vma, addr, next, walk); 2185dc37642SNaoya Horiguchi if (err) 2195dc37642SNaoya Horiguchi break; 220116354d1SNaoya Horiguchi pgd = pgd_offset(walk->mm, next); 221d33b9f45SNaoya Horiguchi continue; 222d33b9f45SNaoya Horiguchi } 223*6c6d5280SKOSAKI Motohiro 224e6473092SMatt Mackall if (pgd_none_or_clear_bad(pgd)) { 225e6473092SMatt Mackall if (walk->pte_hole) 2262165009bSDave Hansen err = walk->pte_hole(addr, next, walk); 227e6473092SMatt Mackall if (err) 228e6473092SMatt Mackall break; 229d33b9f45SNaoya Horiguchi pgd++; 230e6473092SMatt Mackall continue; 231e6473092SMatt Mackall } 232e6473092SMatt Mackall if (walk->pgd_entry) 2332165009bSDave Hansen err = walk->pgd_entry(pgd, addr, next, walk); 234e6473092SMatt Mackall if (!err && 235e6473092SMatt Mackall (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) 2362165009bSDave Hansen err = walk_pud_range(pgd, addr, next, walk); 237e6473092SMatt Mackall if (err) 238e6473092SMatt Mackall break; 239d33b9f45SNaoya Horiguchi pgd++; 240d33b9f45SNaoya Horiguchi } while (addr = next, addr != end); 241e6473092SMatt Mackall 242e6473092SMatt Mackall return err; 243e6473092SMatt Mackall } 244