xref: /linux/mm/pagewalk.c (revision cf79f291f985662150363b4a93d16f88f12643bc)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2a520110eSChristoph Hellwig #include <linux/pagewalk.h>
3e6473092SMatt Mackall #include <linux/highmem.h>
4e6473092SMatt Mackall #include <linux/sched.h>
5d33b9f45SNaoya Horiguchi #include <linux/hugetlb.h>
6e6473092SMatt Mackall 
7b7a16c7aSSteven Price /*
8b7a16c7aSSteven Price  * We want to know the real level where a entry is located ignoring any
9b7a16c7aSSteven Price  * folding of levels which may be happening. For example if p4d is folded then
10b7a16c7aSSteven Price  * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
11b7a16c7aSSteven Price  */
real_depth(int depth)12b7a16c7aSSteven Price static int real_depth(int depth)
13b7a16c7aSSteven Price {
14b7a16c7aSSteven Price 	if (depth == 3 && PTRS_PER_PMD == 1)
15b7a16c7aSSteven Price 		depth = 2;
16b7a16c7aSSteven Price 	if (depth == 2 && PTRS_PER_PUD == 1)
17b7a16c7aSSteven Price 		depth = 1;
18b7a16c7aSSteven Price 	if (depth == 1 && PTRS_PER_P4D == 1)
19b7a16c7aSSteven Price 		depth = 0;
20b7a16c7aSSteven Price 	return depth;
21b7a16c7aSSteven Price }
22b7a16c7aSSteven Price 
walk_pte_range_inner(pte_t * pte,unsigned long addr,unsigned long end,struct mm_walk * walk)23fbf56346SSteven Price static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
24fbf56346SSteven Price 				unsigned long end, struct mm_walk *walk)
25e6473092SMatt Mackall {
267b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
27fbf56346SSteven Price 	int err = 0;
28e6473092SMatt Mackall 
29556637cdSJohannes Weiner 	for (;;) {
307b86ac33SChristoph Hellwig 		err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
31e6473092SMatt Mackall 		if (err)
32e6473092SMatt Mackall 		       break;
33c02a9875SSteven Price 		if (addr >= end - PAGE_SIZE)
34556637cdSJohannes Weiner 			break;
35c02a9875SSteven Price 		addr += PAGE_SIZE;
36556637cdSJohannes Weiner 		pte++;
37556637cdSJohannes Weiner 	}
38fbf56346SSteven Price 	return err;
39fbf56346SSteven Price }
40e6473092SMatt Mackall 
walk_pte_range(pmd_t * pmd,unsigned long addr,unsigned long end,struct mm_walk * walk)41fbf56346SSteven Price static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
42fbf56346SSteven Price 			  struct mm_walk *walk)
43fbf56346SSteven Price {
44fbf56346SSteven Price 	pte_t *pte;
45fbf56346SSteven Price 	int err = 0;
46fbf56346SSteven Price 	spinlock_t *ptl;
47fbf56346SSteven Price 
48fbf56346SSteven Price 	if (walk->no_vma) {
49be872f83SHugh Dickins 		/*
50be872f83SHugh Dickins 		 * pte_offset_map() might apply user-specific validation.
518b1cb4a2SHugh Dickins 		 * Indeed, on x86_64 the pmd entries set up by init_espfix_ap()
528b1cb4a2SHugh Dickins 		 * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear),
538b1cb4a2SHugh Dickins 		 * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them.
54be872f83SHugh Dickins 		 */
558b1cb4a2SHugh Dickins 		if (walk->mm == &init_mm || addr >= TASK_SIZE)
56be872f83SHugh Dickins 			pte = pte_offset_kernel(pmd, addr);
57be872f83SHugh Dickins 		else
58fbf56346SSteven Price 			pte = pte_offset_map(pmd, addr);
59be872f83SHugh Dickins 		if (pte) {
60fbf56346SSteven Price 			err = walk_pte_range_inner(pte, addr, end, walk);
61ee40d543SHugh Dickins 			if (walk->mm != &init_mm && addr < TASK_SIZE)
62fbf56346SSteven Price 				pte_unmap(pte);
63be872f83SHugh Dickins 		}
64fbf56346SSteven Price 	} else {
65fbf56346SSteven Price 		pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
66be872f83SHugh Dickins 		if (pte) {
67fbf56346SSteven Price 			err = walk_pte_range_inner(pte, addr, end, walk);
68ace88f10SThomas Hellstrom 			pte_unmap_unlock(pte, ptl);
69fbf56346SSteven Price 		}
70be872f83SHugh Dickins 	}
71be872f83SHugh Dickins 	if (!pte)
72be872f83SHugh Dickins 		walk->action = ACTION_AGAIN;
73e6473092SMatt Mackall 	return err;
74e6473092SMatt Mackall }
75e6473092SMatt Mackall 
76e17eae2bSChristophe Leroy #ifdef CONFIG_ARCH_HAS_HUGEPD
walk_hugepd_range(hugepd_t * phpd,unsigned long addr,unsigned long end,struct mm_walk * walk,int pdshift)77e17eae2bSChristophe Leroy static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
78e17eae2bSChristophe Leroy 			     unsigned long end, struct mm_walk *walk, int pdshift)
79e17eae2bSChristophe Leroy {
80e17eae2bSChristophe Leroy 	int err = 0;
81e17eae2bSChristophe Leroy 	const struct mm_walk_ops *ops = walk->ops;
82e17eae2bSChristophe Leroy 	int shift = hugepd_shift(*phpd);
83e17eae2bSChristophe Leroy 	int page_size = 1 << shift;
84e17eae2bSChristophe Leroy 
85e17eae2bSChristophe Leroy 	if (!ops->pte_entry)
86e17eae2bSChristophe Leroy 		return 0;
87e17eae2bSChristophe Leroy 
88e17eae2bSChristophe Leroy 	if (addr & (page_size - 1))
89e17eae2bSChristophe Leroy 		return 0;
90e17eae2bSChristophe Leroy 
91e17eae2bSChristophe Leroy 	for (;;) {
92e17eae2bSChristophe Leroy 		pte_t *pte;
93e17eae2bSChristophe Leroy 
94e17eae2bSChristophe Leroy 		spin_lock(&walk->mm->page_table_lock);
95e17eae2bSChristophe Leroy 		pte = hugepte_offset(*phpd, addr, pdshift);
96e17eae2bSChristophe Leroy 		err = ops->pte_entry(pte, addr, addr + page_size, walk);
97e17eae2bSChristophe Leroy 		spin_unlock(&walk->mm->page_table_lock);
98e17eae2bSChristophe Leroy 
99e17eae2bSChristophe Leroy 		if (err)
100e17eae2bSChristophe Leroy 			break;
101e17eae2bSChristophe Leroy 		if (addr >= end - page_size)
102e17eae2bSChristophe Leroy 			break;
103e17eae2bSChristophe Leroy 		addr += page_size;
104e17eae2bSChristophe Leroy 	}
105e17eae2bSChristophe Leroy 	return err;
106e17eae2bSChristophe Leroy }
107e17eae2bSChristophe Leroy #else
walk_hugepd_range(hugepd_t * phpd,unsigned long addr,unsigned long end,struct mm_walk * walk,int pdshift)108e17eae2bSChristophe Leroy static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
109e17eae2bSChristophe Leroy 			     unsigned long end, struct mm_walk *walk, int pdshift)
110e17eae2bSChristophe Leroy {
111e17eae2bSChristophe Leroy 	return 0;
112e17eae2bSChristophe Leroy }
113e17eae2bSChristophe Leroy #endif
114e17eae2bSChristophe Leroy 
walk_pmd_range(pud_t * pud,unsigned long addr,unsigned long end,struct mm_walk * walk)115e6473092SMatt Mackall static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
1162165009bSDave Hansen 			  struct mm_walk *walk)
117e6473092SMatt Mackall {
118e6473092SMatt Mackall 	pmd_t *pmd;
119e6473092SMatt Mackall 	unsigned long next;
1207b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
121e6473092SMatt Mackall 	int err = 0;
122b7a16c7aSSteven Price 	int depth = real_depth(3);
123e6473092SMatt Mackall 
124e6473092SMatt Mackall 	pmd = pmd_offset(pud, addr);
125e6473092SMatt Mackall 	do {
12603319327SDave Hansen again:
127e6473092SMatt Mackall 		next = pmd_addr_end(addr, end);
1288782fb61SSteven Price 		if (pmd_none(*pmd)) {
1297b86ac33SChristoph Hellwig 			if (ops->pte_hole)
130b7a16c7aSSteven Price 				err = ops->pte_hole(addr, next, depth, walk);
131e6473092SMatt Mackall 			if (err)
132e6473092SMatt Mackall 				break;
133e6473092SMatt Mackall 			continue;
134e6473092SMatt Mackall 		}
1353afc4236SSteven Price 
1363afc4236SSteven Price 		walk->action = ACTION_SUBTREE;
1373afc4236SSteven Price 
13803319327SDave Hansen 		/*
13903319327SDave Hansen 		 * This implies that each ->pmd_entry() handler
14003319327SDave Hansen 		 * needs to know about pmd_trans_huge() pmds
14103319327SDave Hansen 		 */
1427b86ac33SChristoph Hellwig 		if (ops->pmd_entry)
1437b86ac33SChristoph Hellwig 			err = ops->pmd_entry(pmd, addr, next, walk);
14403319327SDave Hansen 		if (err)
14503319327SDave Hansen 			break;
14603319327SDave Hansen 
1473afc4236SSteven Price 		if (walk->action == ACTION_AGAIN)
1483afc4236SSteven Price 			goto again;
1493afc4236SSteven Price 
15003319327SDave Hansen 		/*
15103319327SDave Hansen 		 * Check this here so we only break down trans_huge
15203319327SDave Hansen 		 * pages when we _need_ to
15303319327SDave Hansen 		 */
154488ae6a2SSteven Price 		if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
155488ae6a2SSteven Price 		    walk->action == ACTION_CONTINUE ||
1563afc4236SSteven Price 		    !(ops->pte_entry))
15703319327SDave Hansen 			continue;
15803319327SDave Hansen 
159be872f83SHugh Dickins 		if (walk->vma)
16078ddc534SKirill A. Shutemov 			split_huge_pmd(walk->vma, pmd, addr);
1613afc4236SSteven Price 
162e17eae2bSChristophe Leroy 		if (is_hugepd(__hugepd(pmd_val(*pmd))))
163e17eae2bSChristophe Leroy 			err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
164e17eae2bSChristophe Leroy 		else
1652165009bSDave Hansen 			err = walk_pte_range(pmd, addr, next, walk);
166e6473092SMatt Mackall 		if (err)
167e6473092SMatt Mackall 			break;
168be872f83SHugh Dickins 
169be872f83SHugh Dickins 		if (walk->action == ACTION_AGAIN)
170be872f83SHugh Dickins 			goto again;
171be872f83SHugh Dickins 
172e6473092SMatt Mackall 	} while (pmd++, addr = next, addr != end);
173e6473092SMatt Mackall 
174e6473092SMatt Mackall 	return err;
175e6473092SMatt Mackall }
176e6473092SMatt Mackall 
walk_pud_range(p4d_t * p4d,unsigned long addr,unsigned long end,struct mm_walk * walk)177c2febafcSKirill A. Shutemov static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
1782165009bSDave Hansen 			  struct mm_walk *walk)
179e6473092SMatt Mackall {
180e6473092SMatt Mackall 	pud_t *pud;
181e6473092SMatt Mackall 	unsigned long next;
1827b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
183e6473092SMatt Mackall 	int err = 0;
184b7a16c7aSSteven Price 	int depth = real_depth(2);
185e6473092SMatt Mackall 
186c2febafcSKirill A. Shutemov 	pud = pud_offset(p4d, addr);
187e6473092SMatt Mackall 	do {
188a00cc7d9SMatthew Wilcox  again:
189e6473092SMatt Mackall 		next = pud_addr_end(addr, end);
1908782fb61SSteven Price 		if (pud_none(*pud)) {
1917b86ac33SChristoph Hellwig 			if (ops->pte_hole)
192b7a16c7aSSteven Price 				err = ops->pte_hole(addr, next, depth, walk);
193e6473092SMatt Mackall 			if (err)
194e6473092SMatt Mackall 				break;
195e6473092SMatt Mackall 			continue;
196e6473092SMatt Mackall 		}
197a00cc7d9SMatthew Wilcox 
1983afc4236SSteven Price 		walk->action = ACTION_SUBTREE;
199a00cc7d9SMatthew Wilcox 
2003afc4236SSteven Price 		if (ops->pud_entry)
2017b86ac33SChristoph Hellwig 			err = ops->pud_entry(pud, addr, next, walk);
202a00cc7d9SMatthew Wilcox 		if (err)
203a00cc7d9SMatthew Wilcox 			break;
2043afc4236SSteven Price 
2053afc4236SSteven Price 		if (walk->action == ACTION_AGAIN)
2063afc4236SSteven Price 			goto again;
2073afc4236SSteven Price 
208488ae6a2SSteven Price 		if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
209488ae6a2SSteven Price 		    walk->action == ACTION_CONTINUE ||
2103afc4236SSteven Price 		    !(ops->pmd_entry || ops->pte_entry))
211a00cc7d9SMatthew Wilcox 			continue;
212a00cc7d9SMatthew Wilcox 
213488ae6a2SSteven Price 		if (walk->vma)
214a00cc7d9SMatthew Wilcox 			split_huge_pud(walk->vma, pud, addr);
215a00cc7d9SMatthew Wilcox 		if (pud_none(*pud))
216a00cc7d9SMatthew Wilcox 			goto again;
217a00cc7d9SMatthew Wilcox 
218e17eae2bSChristophe Leroy 		if (is_hugepd(__hugepd(pud_val(*pud))))
219e17eae2bSChristophe Leroy 			err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
220e17eae2bSChristophe Leroy 		else
2212165009bSDave Hansen 			err = walk_pmd_range(pud, addr, next, walk);
222e6473092SMatt Mackall 		if (err)
223e6473092SMatt Mackall 			break;
224e6473092SMatt Mackall 	} while (pud++, addr = next, addr != end);
225e6473092SMatt Mackall 
226e6473092SMatt Mackall 	return err;
227e6473092SMatt Mackall }
228e6473092SMatt Mackall 
walk_p4d_range(pgd_t * pgd,unsigned long addr,unsigned long end,struct mm_walk * walk)229c2febafcSKirill A. Shutemov static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
230c2febafcSKirill A. Shutemov 			  struct mm_walk *walk)
231c2febafcSKirill A. Shutemov {
232c2febafcSKirill A. Shutemov 	p4d_t *p4d;
233c2febafcSKirill A. Shutemov 	unsigned long next;
2347b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
235c2febafcSKirill A. Shutemov 	int err = 0;
236b7a16c7aSSteven Price 	int depth = real_depth(1);
237c2febafcSKirill A. Shutemov 
238c2febafcSKirill A. Shutemov 	p4d = p4d_offset(pgd, addr);
239c2febafcSKirill A. Shutemov 	do {
240c2febafcSKirill A. Shutemov 		next = p4d_addr_end(addr, end);
241c2febafcSKirill A. Shutemov 		if (p4d_none_or_clear_bad(p4d)) {
2427b86ac33SChristoph Hellwig 			if (ops->pte_hole)
243b7a16c7aSSteven Price 				err = ops->pte_hole(addr, next, depth, walk);
244c2febafcSKirill A. Shutemov 			if (err)
245c2febafcSKirill A. Shutemov 				break;
246c2febafcSKirill A. Shutemov 			continue;
247c2febafcSKirill A. Shutemov 		}
2483afc4236SSteven Price 		if (ops->p4d_entry) {
2493afc4236SSteven Price 			err = ops->p4d_entry(p4d, addr, next, walk);
2503afc4236SSteven Price 			if (err)
2513afc4236SSteven Price 				break;
2523afc4236SSteven Price 		}
253e17eae2bSChristophe Leroy 		if (is_hugepd(__hugepd(p4d_val(*p4d))))
254e17eae2bSChristophe Leroy 			err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
255e17eae2bSChristophe Leroy 		else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
256c2febafcSKirill A. Shutemov 			err = walk_pud_range(p4d, addr, next, walk);
257c2febafcSKirill A. Shutemov 		if (err)
258c2febafcSKirill A. Shutemov 			break;
259c2febafcSKirill A. Shutemov 	} while (p4d++, addr = next, addr != end);
260c2febafcSKirill A. Shutemov 
261c2febafcSKirill A. Shutemov 	return err;
262c2febafcSKirill A. Shutemov }
263c2febafcSKirill A. Shutemov 
walk_pgd_range(unsigned long addr,unsigned long end,struct mm_walk * walk)264fafaa426SNaoya Horiguchi static int walk_pgd_range(unsigned long addr, unsigned long end,
265fafaa426SNaoya Horiguchi 			  struct mm_walk *walk)
266fafaa426SNaoya Horiguchi {
267fafaa426SNaoya Horiguchi 	pgd_t *pgd;
268fafaa426SNaoya Horiguchi 	unsigned long next;
2697b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
270fafaa426SNaoya Horiguchi 	int err = 0;
271fafaa426SNaoya Horiguchi 
272e47690d7SSteven Price 	if (walk->pgd)
273e47690d7SSteven Price 		pgd = walk->pgd + pgd_index(addr);
274e47690d7SSteven Price 	else
275fafaa426SNaoya Horiguchi 		pgd = pgd_offset(walk->mm, addr);
276fafaa426SNaoya Horiguchi 	do {
277fafaa426SNaoya Horiguchi 		next = pgd_addr_end(addr, end);
278fafaa426SNaoya Horiguchi 		if (pgd_none_or_clear_bad(pgd)) {
2797b86ac33SChristoph Hellwig 			if (ops->pte_hole)
280b7a16c7aSSteven Price 				err = ops->pte_hole(addr, next, 0, walk);
281fafaa426SNaoya Horiguchi 			if (err)
282fafaa426SNaoya Horiguchi 				break;
283fafaa426SNaoya Horiguchi 			continue;
284fafaa426SNaoya Horiguchi 		}
2853afc4236SSteven Price 		if (ops->pgd_entry) {
2863afc4236SSteven Price 			err = ops->pgd_entry(pgd, addr, next, walk);
2873afc4236SSteven Price 			if (err)
2883afc4236SSteven Price 				break;
2893afc4236SSteven Price 		}
290e17eae2bSChristophe Leroy 		if (is_hugepd(__hugepd(pgd_val(*pgd))))
291e17eae2bSChristophe Leroy 			err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
292e17eae2bSChristophe Leroy 		else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
293c2febafcSKirill A. Shutemov 			err = walk_p4d_range(pgd, addr, next, walk);
294fafaa426SNaoya Horiguchi 		if (err)
295fafaa426SNaoya Horiguchi 			break;
296fafaa426SNaoya Horiguchi 	} while (pgd++, addr = next, addr != end);
297fafaa426SNaoya Horiguchi 
298fafaa426SNaoya Horiguchi 	return err;
299fafaa426SNaoya Horiguchi }
300fafaa426SNaoya Horiguchi 
301116354d1SNaoya Horiguchi #ifdef CONFIG_HUGETLB_PAGE
hugetlb_entry_end(struct hstate * h,unsigned long addr,unsigned long end)302116354d1SNaoya Horiguchi static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
303116354d1SNaoya Horiguchi 				       unsigned long end)
304116354d1SNaoya Horiguchi {
305116354d1SNaoya Horiguchi 	unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
306116354d1SNaoya Horiguchi 	return boundary < end ? boundary : end;
307116354d1SNaoya Horiguchi }
308116354d1SNaoya Horiguchi 
walk_hugetlb_range(unsigned long addr,unsigned long end,struct mm_walk * walk)309fafaa426SNaoya Horiguchi static int walk_hugetlb_range(unsigned long addr, unsigned long end,
310116354d1SNaoya Horiguchi 			      struct mm_walk *walk)
311116354d1SNaoya Horiguchi {
312fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
313116354d1SNaoya Horiguchi 	struct hstate *h = hstate_vma(vma);
314116354d1SNaoya Horiguchi 	unsigned long next;
315116354d1SNaoya Horiguchi 	unsigned long hmask = huge_page_mask(h);
3167868a208SPunit Agrawal 	unsigned long sz = huge_page_size(h);
317116354d1SNaoya Horiguchi 	pte_t *pte;
3187b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
319116354d1SNaoya Horiguchi 	int err = 0;
320116354d1SNaoya Horiguchi 
321dd361e50SPeter Xu 	hugetlb_vma_lock_read(vma);
322116354d1SNaoya Horiguchi 	do {
323116354d1SNaoya Horiguchi 		next = hugetlb_entry_end(h, addr, end);
3249c67a207SPeter Xu 		pte = hugetlb_walk(vma, addr & hmask, sz);
325373c4557SJann Horn 		if (pte)
3267b86ac33SChristoph Hellwig 			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
3277b86ac33SChristoph Hellwig 		else if (ops->pte_hole)
328b7a16c7aSSteven Price 			err = ops->pte_hole(addr, next, -1, walk);
329116354d1SNaoya Horiguchi 		if (err)
330fafaa426SNaoya Horiguchi 			break;
331116354d1SNaoya Horiguchi 	} while (addr = next, addr != end);
332dd361e50SPeter Xu 	hugetlb_vma_unlock_read(vma);
333116354d1SNaoya Horiguchi 
334fafaa426SNaoya Horiguchi 	return err;
335116354d1SNaoya Horiguchi }
3366c6d5280SKOSAKI Motohiro 
3376c6d5280SKOSAKI Motohiro #else /* CONFIG_HUGETLB_PAGE */
walk_hugetlb_range(unsigned long addr,unsigned long end,struct mm_walk * walk)338fafaa426SNaoya Horiguchi static int walk_hugetlb_range(unsigned long addr, unsigned long end,
3396c6d5280SKOSAKI Motohiro 			      struct mm_walk *walk)
3406c6d5280SKOSAKI Motohiro {
3416c6d5280SKOSAKI Motohiro 	return 0;
3426c6d5280SKOSAKI Motohiro }
3436c6d5280SKOSAKI Motohiro 
3446c6d5280SKOSAKI Motohiro #endif /* CONFIG_HUGETLB_PAGE */
3456c6d5280SKOSAKI Motohiro 
346fafaa426SNaoya Horiguchi /*
347fafaa426SNaoya Horiguchi  * Decide whether we really walk over the current vma on [@start, @end)
348fafaa426SNaoya Horiguchi  * or skip it via the returned value. Return 0 if we do walk over the
349fafaa426SNaoya Horiguchi  * current vma, and return 1 if we skip the vma. Negative values means
350fafaa426SNaoya Horiguchi  * error, where we abort the current walk.
351e6473092SMatt Mackall  */
walk_page_test(unsigned long start,unsigned long end,struct mm_walk * walk)352fafaa426SNaoya Horiguchi static int walk_page_test(unsigned long start, unsigned long end,
3532165009bSDave Hansen 			struct mm_walk *walk)
354e6473092SMatt Mackall {
355fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
3567b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
357e6473092SMatt Mackall 
3587b86ac33SChristoph Hellwig 	if (ops->test_walk)
3597b86ac33SChristoph Hellwig 		return ops->test_walk(start, end, walk);
360fafaa426SNaoya Horiguchi 
361fafaa426SNaoya Horiguchi 	/*
36248684a65SNaoya Horiguchi 	 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
36348684a65SNaoya Horiguchi 	 * range, so we don't walk over it as we do for normal vmas. However,
36448684a65SNaoya Horiguchi 	 * Some callers are interested in handling hole range and they don't
36548684a65SNaoya Horiguchi 	 * want to just ignore any single address range. Such users certainly
36648684a65SNaoya Horiguchi 	 * define their ->pte_hole() callbacks, so let's delegate them to handle
36748684a65SNaoya Horiguchi 	 * vma(VM_PFNMAP).
368fafaa426SNaoya Horiguchi 	 */
36948684a65SNaoya Horiguchi 	if (vma->vm_flags & VM_PFNMAP) {
37048684a65SNaoya Horiguchi 		int err = 1;
3717b86ac33SChristoph Hellwig 		if (ops->pte_hole)
372b7a16c7aSSteven Price 			err = ops->pte_hole(start, end, -1, walk);
37348684a65SNaoya Horiguchi 		return err ? err : 1;
37448684a65SNaoya Horiguchi 	}
375fafaa426SNaoya Horiguchi 	return 0;
376fafaa426SNaoya Horiguchi }
377fafaa426SNaoya Horiguchi 
__walk_page_range(unsigned long start,unsigned long end,struct mm_walk * walk)378fafaa426SNaoya Horiguchi static int __walk_page_range(unsigned long start, unsigned long end,
379fafaa426SNaoya Horiguchi 			struct mm_walk *walk)
380fafaa426SNaoya Horiguchi {
381fafaa426SNaoya Horiguchi 	int err = 0;
382fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
383ecaad8acSThomas Hellstrom 	const struct mm_walk_ops *ops = walk->ops;
384ecaad8acSThomas Hellstrom 
3858782fb61SSteven Price 	if (ops->pre_vma) {
386ecaad8acSThomas Hellstrom 		err = ops->pre_vma(start, end, walk);
387ecaad8acSThomas Hellstrom 		if (err)
388ecaad8acSThomas Hellstrom 			return err;
389ecaad8acSThomas Hellstrom 	}
390fafaa426SNaoya Horiguchi 
3918782fb61SSteven Price 	if (is_vm_hugetlb_page(vma)) {
392ecaad8acSThomas Hellstrom 		if (ops->hugetlb_entry)
393fafaa426SNaoya Horiguchi 			err = walk_hugetlb_range(start, end, walk);
394fafaa426SNaoya Horiguchi 	} else
395fafaa426SNaoya Horiguchi 		err = walk_pgd_range(start, end, walk);
396fafaa426SNaoya Horiguchi 
3978782fb61SSteven Price 	if (ops->post_vma)
398ecaad8acSThomas Hellstrom 		ops->post_vma(walk);
399ecaad8acSThomas Hellstrom 
400e6473092SMatt Mackall 	return err;
401fafaa426SNaoya Horiguchi }
402fafaa426SNaoya Horiguchi 
process_mm_walk_lock(struct mm_struct * mm,enum page_walk_lock walk_lock)40349b06385SSuren Baghdasaryan static inline void process_mm_walk_lock(struct mm_struct *mm,
40449b06385SSuren Baghdasaryan 					enum page_walk_lock walk_lock)
40549b06385SSuren Baghdasaryan {
40649b06385SSuren Baghdasaryan 	if (walk_lock == PGWALK_RDLOCK)
40749b06385SSuren Baghdasaryan 		mmap_assert_locked(mm);
40849b06385SSuren Baghdasaryan 	else
40949b06385SSuren Baghdasaryan 		mmap_assert_write_locked(mm);
41049b06385SSuren Baghdasaryan }
41149b06385SSuren Baghdasaryan 
process_vma_walk_lock(struct vm_area_struct * vma,enum page_walk_lock walk_lock)41249b06385SSuren Baghdasaryan static inline void process_vma_walk_lock(struct vm_area_struct *vma,
41349b06385SSuren Baghdasaryan 					 enum page_walk_lock walk_lock)
41449b06385SSuren Baghdasaryan {
41549b06385SSuren Baghdasaryan #ifdef CONFIG_PER_VMA_LOCK
41649b06385SSuren Baghdasaryan 	switch (walk_lock) {
41749b06385SSuren Baghdasaryan 	case PGWALK_WRLOCK:
41849b06385SSuren Baghdasaryan 		vma_start_write(vma);
41949b06385SSuren Baghdasaryan 		break;
42049b06385SSuren Baghdasaryan 	case PGWALK_WRLOCK_VERIFY:
42149b06385SSuren Baghdasaryan 		vma_assert_write_locked(vma);
42249b06385SSuren Baghdasaryan 		break;
42349b06385SSuren Baghdasaryan 	case PGWALK_RDLOCK:
42449b06385SSuren Baghdasaryan 		/* PGWALK_RDLOCK is handled by process_mm_walk_lock */
42549b06385SSuren Baghdasaryan 		break;
42649b06385SSuren Baghdasaryan 	}
42749b06385SSuren Baghdasaryan #endif
42849b06385SSuren Baghdasaryan }
42949b06385SSuren Baghdasaryan 
430fafaa426SNaoya Horiguchi /**
431fafaa426SNaoya Horiguchi  * walk_page_range - walk page table with caller specific callbacks
4327b86ac33SChristoph Hellwig  * @mm:		mm_struct representing the target process of page table walk
433e8b098fcSMike Rapoport  * @start:	start address of the virtual address range
434e8b098fcSMike Rapoport  * @end:	end address of the virtual address range
4357b86ac33SChristoph Hellwig  * @ops:	operation to call during the walk
4367b86ac33SChristoph Hellwig  * @private:	private data for callbacks' usage
437fafaa426SNaoya Horiguchi  *
4387b86ac33SChristoph Hellwig  * Recursively walk the page table tree of the process represented by @mm
439fafaa426SNaoya Horiguchi  * within the virtual address range [@start, @end). During walking, we can do
440fafaa426SNaoya Horiguchi  * some caller-specific works for each entry, by setting up pmd_entry(),
441fafaa426SNaoya Horiguchi  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
442fafaa426SNaoya Horiguchi  * callbacks, the associated entries/pages are just ignored.
443fafaa426SNaoya Horiguchi  * The return values of these callbacks are commonly defined like below:
444a5d09bedSMike Rapoport  *
445fafaa426SNaoya Horiguchi  *  - 0  : succeeded to handle the current entry, and if you don't reach the
446fafaa426SNaoya Horiguchi  *         end address yet, continue to walk.
447fafaa426SNaoya Horiguchi  *  - >0 : succeeded to handle the current entry, and return to the caller
448fafaa426SNaoya Horiguchi  *         with caller specific value.
449fafaa426SNaoya Horiguchi  *  - <0 : failed to handle the current entry, and return to the caller
450fafaa426SNaoya Horiguchi  *         with error code.
451fafaa426SNaoya Horiguchi  *
452fafaa426SNaoya Horiguchi  * Before starting to walk page table, some callers want to check whether
453fafaa426SNaoya Horiguchi  * they really want to walk over the current vma, typically by checking
4547b86ac33SChristoph Hellwig  * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
455fafaa426SNaoya Horiguchi  * purpose.
456fafaa426SNaoya Horiguchi  *
457ecaad8acSThomas Hellstrom  * If operations need to be staged before and committed after a vma is walked,
458ecaad8acSThomas Hellstrom  * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
459ecaad8acSThomas Hellstrom  * since it is intended to handle commit-type operations, can't return any
460ecaad8acSThomas Hellstrom  * errors.
461ecaad8acSThomas Hellstrom  *
462fafaa426SNaoya Horiguchi  * struct mm_walk keeps current values of some common data like vma and pmd,
463fafaa426SNaoya Horiguchi  * which are useful for the access from callbacks. If you want to pass some
4647b86ac33SChristoph Hellwig  * caller-specific data to callbacks, @private should be helpful.
465fafaa426SNaoya Horiguchi  *
466fafaa426SNaoya Horiguchi  * Locking:
467c1e8d7c6SMichel Lespinasse  *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock,
4687b86ac33SChristoph Hellwig  *   because these function traverse vma list and/or access to vma's data.
469fafaa426SNaoya Horiguchi  */
walk_page_range(struct mm_struct * mm,unsigned long start,unsigned long end,const struct mm_walk_ops * ops,void * private)4707b86ac33SChristoph Hellwig int walk_page_range(struct mm_struct *mm, unsigned long start,
4717b86ac33SChristoph Hellwig 		unsigned long end, const struct mm_walk_ops *ops,
4727b86ac33SChristoph Hellwig 		void *private)
473fafaa426SNaoya Horiguchi {
474fafaa426SNaoya Horiguchi 	int err = 0;
475fafaa426SNaoya Horiguchi 	unsigned long next;
476fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma;
4777b86ac33SChristoph Hellwig 	struct mm_walk walk = {
4787b86ac33SChristoph Hellwig 		.ops		= ops,
4797b86ac33SChristoph Hellwig 		.mm		= mm,
4807b86ac33SChristoph Hellwig 		.private	= private,
4817b86ac33SChristoph Hellwig 	};
482fafaa426SNaoya Horiguchi 
483fafaa426SNaoya Horiguchi 	if (start >= end)
484fafaa426SNaoya Horiguchi 		return -EINVAL;
485e6473092SMatt Mackall 
4867b86ac33SChristoph Hellwig 	if (!walk.mm)
4872165009bSDave Hansen 		return -EINVAL;
4882165009bSDave Hansen 
48949b06385SSuren Baghdasaryan 	process_mm_walk_lock(walk.mm, ops->walk_lock);
490a9ff785eSCliff Wickman 
4917b86ac33SChristoph Hellwig 	vma = find_vma(walk.mm, start);
492e6473092SMatt Mackall 	do {
493fafaa426SNaoya Horiguchi 		if (!vma) { /* after the last vma */
4947b86ac33SChristoph Hellwig 			walk.vma = NULL;
495fafaa426SNaoya Horiguchi 			next = end;
4968782fb61SSteven Price 			if (ops->pte_hole)
4978782fb61SSteven Price 				err = ops->pte_hole(start, next, -1, &walk);
498fafaa426SNaoya Horiguchi 		} else if (start < vma->vm_start) { /* outside vma */
4997b86ac33SChristoph Hellwig 			walk.vma = NULL;
500fafaa426SNaoya Horiguchi 			next = min(end, vma->vm_start);
5018782fb61SSteven Price 			if (ops->pte_hole)
5028782fb61SSteven Price 				err = ops->pte_hole(start, next, -1, &walk);
503fafaa426SNaoya Horiguchi 		} else { /* inside vma */
50449b06385SSuren Baghdasaryan 			process_vma_walk_lock(vma, ops->walk_lock);
5057b86ac33SChristoph Hellwig 			walk.vma = vma;
506fafaa426SNaoya Horiguchi 			next = min(end, vma->vm_end);
5079ec08f30SMatthew Wilcox (Oracle) 			vma = find_vma(mm, vma->vm_end);
5085f0af70aSDavid Sterba 
5097b86ac33SChristoph Hellwig 			err = walk_page_test(start, next, &walk);
510f6837395SNaoya Horiguchi 			if (err > 0) {
511f6837395SNaoya Horiguchi 				/*
512f6837395SNaoya Horiguchi 				 * positive return values are purely for
513f6837395SNaoya Horiguchi 				 * controlling the pagewalk, so should never
514f6837395SNaoya Horiguchi 				 * be passed to the callers.
515f6837395SNaoya Horiguchi 				 */
516f6837395SNaoya Horiguchi 				err = 0;
517a9ff785eSCliff Wickman 				continue;
518f6837395SNaoya Horiguchi 			}
519fafaa426SNaoya Horiguchi 			if (err < 0)
520fafaa426SNaoya Horiguchi 				break;
5217b86ac33SChristoph Hellwig 			err = __walk_page_range(start, next, &walk);
5228782fb61SSteven Price 		}
5235dc37642SNaoya Horiguchi 		if (err)
5245dc37642SNaoya Horiguchi 			break;
525fafaa426SNaoya Horiguchi 	} while (start = next, start < end);
526e6473092SMatt Mackall 	return err;
527e6473092SMatt Mackall }
528900fc5f1SNaoya Horiguchi 
5298bd3873dSRolf Eike Beer /**
5308bd3873dSRolf Eike Beer  * walk_page_range_novma - walk a range of pagetables not backed by a vma
5318bd3873dSRolf Eike Beer  * @mm:		mm_struct representing the target process of page table walk
5328bd3873dSRolf Eike Beer  * @start:	start address of the virtual address range
5338bd3873dSRolf Eike Beer  * @end:	end address of the virtual address range
5348bd3873dSRolf Eike Beer  * @ops:	operation to call during the walk
5358bd3873dSRolf Eike Beer  * @pgd:	pgd to walk if different from mm->pgd
5368bd3873dSRolf Eike Beer  * @private:	private data for callbacks' usage
5378bd3873dSRolf Eike Beer  *
538fbf56346SSteven Price  * Similar to walk_page_range() but can walk any page tables even if they are
539fbf56346SSteven Price  * not backed by VMAs. Because 'unusual' entries may be walked this function
540fbf56346SSteven Price  * will also not lock the PTEs for the pte_entry() callback. This is useful for
541fbf56346SSteven Price  * walking the kernel pages tables or page tables for firmware.
542*b123d093SMuchun Song  *
543*b123d093SMuchun Song  * Note: Be careful to walk the kernel pages tables, the caller may be need to
544*b123d093SMuchun Song  * take other effective approache (mmap lock may be insufficient) to prevent
545*b123d093SMuchun Song  * the intermediate kernel page tables belonging to the specified address range
546*b123d093SMuchun Song  * from being freed (e.g. memory hot-remove).
547fbf56346SSteven Price  */
walk_page_range_novma(struct mm_struct * mm,unsigned long start,unsigned long end,const struct mm_walk_ops * ops,pgd_t * pgd,void * private)548488ae6a2SSteven Price int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
549488ae6a2SSteven Price 			  unsigned long end, const struct mm_walk_ops *ops,
550e47690d7SSteven Price 			  pgd_t *pgd,
551488ae6a2SSteven Price 			  void *private)
552488ae6a2SSteven Price {
553488ae6a2SSteven Price 	struct mm_walk walk = {
554488ae6a2SSteven Price 		.ops		= ops,
555488ae6a2SSteven Price 		.mm		= mm,
556e47690d7SSteven Price 		.pgd		= pgd,
557488ae6a2SSteven Price 		.private	= private,
558488ae6a2SSteven Price 		.no_vma		= true
559488ae6a2SSteven Price 	};
560488ae6a2SSteven Price 
561488ae6a2SSteven Price 	if (start >= end || !walk.mm)
562488ae6a2SSteven Price 		return -EINVAL;
563488ae6a2SSteven Price 
564*b123d093SMuchun Song 	/*
565*b123d093SMuchun Song 	 * 1) For walking the user virtual address space:
566*b123d093SMuchun Song 	 *
567*b123d093SMuchun Song 	 * The mmap lock protects the page walker from changes to the page
568*b123d093SMuchun Song 	 * tables during the walk.  However a read lock is insufficient to
569*b123d093SMuchun Song 	 * protect those areas which don't have a VMA as munmap() detaches
570*b123d093SMuchun Song 	 * the VMAs before downgrading to a read lock and actually tearing
571*b123d093SMuchun Song 	 * down PTEs/page tables. In which case, the mmap write lock should
572*b123d093SMuchun Song 	 * be hold.
573*b123d093SMuchun Song 	 *
574*b123d093SMuchun Song 	 * 2) For walking the kernel virtual address space:
575*b123d093SMuchun Song 	 *
576*b123d093SMuchun Song 	 * The kernel intermediate page tables usually do not be freed, so
577*b123d093SMuchun Song 	 * the mmap map read lock is sufficient. But there are some exceptions.
578*b123d093SMuchun Song 	 * E.g. memory hot-remove. In which case, the mmap lock is insufficient
579*b123d093SMuchun Song 	 * to prevent the intermediate kernel pages tables belonging to the
580*b123d093SMuchun Song 	 * specified address range from being freed. The caller should take
581*b123d093SMuchun Song 	 * other actions to prevent this race.
582*b123d093SMuchun Song 	 */
583*b123d093SMuchun Song 	if (mm == &init_mm)
584*b123d093SMuchun Song 		mmap_assert_locked(walk.mm);
585*b123d093SMuchun Song 	else
5868782fb61SSteven Price 		mmap_assert_write_locked(walk.mm);
587488ae6a2SSteven Price 
5888782fb61SSteven Price 	return walk_pgd_range(start, end, &walk);
589488ae6a2SSteven Price }
590488ae6a2SSteven Price 
walk_page_range_vma(struct vm_area_struct * vma,unsigned long start,unsigned long end,const struct mm_walk_ops * ops,void * private)591e07cda5fSDavid Hildenbrand int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
592e07cda5fSDavid Hildenbrand 			unsigned long end, const struct mm_walk_ops *ops,
593e07cda5fSDavid Hildenbrand 			void *private)
594e07cda5fSDavid Hildenbrand {
595e07cda5fSDavid Hildenbrand 	struct mm_walk walk = {
596e07cda5fSDavid Hildenbrand 		.ops		= ops,
597e07cda5fSDavid Hildenbrand 		.mm		= vma->vm_mm,
598e07cda5fSDavid Hildenbrand 		.vma		= vma,
599e07cda5fSDavid Hildenbrand 		.private	= private,
600e07cda5fSDavid Hildenbrand 	};
601e07cda5fSDavid Hildenbrand 
602e07cda5fSDavid Hildenbrand 	if (start >= end || !walk.mm)
603e07cda5fSDavid Hildenbrand 		return -EINVAL;
604e07cda5fSDavid Hildenbrand 	if (start < vma->vm_start || end > vma->vm_end)
605e07cda5fSDavid Hildenbrand 		return -EINVAL;
606e07cda5fSDavid Hildenbrand 
60749b06385SSuren Baghdasaryan 	process_mm_walk_lock(walk.mm, ops->walk_lock);
60849b06385SSuren Baghdasaryan 	process_vma_walk_lock(vma, ops->walk_lock);
609e07cda5fSDavid Hildenbrand 	return __walk_page_range(start, end, &walk);
610e07cda5fSDavid Hildenbrand }
611e07cda5fSDavid Hildenbrand 
walk_page_vma(struct vm_area_struct * vma,const struct mm_walk_ops * ops,void * private)6127b86ac33SChristoph Hellwig int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
6137b86ac33SChristoph Hellwig 		void *private)
614900fc5f1SNaoya Horiguchi {
6157b86ac33SChristoph Hellwig 	struct mm_walk walk = {
6167b86ac33SChristoph Hellwig 		.ops		= ops,
6177b86ac33SChristoph Hellwig 		.mm		= vma->vm_mm,
6187b86ac33SChristoph Hellwig 		.vma		= vma,
6197b86ac33SChristoph Hellwig 		.private	= private,
6207b86ac33SChristoph Hellwig 	};
621900fc5f1SNaoya Horiguchi 
6227b86ac33SChristoph Hellwig 	if (!walk.mm)
623900fc5f1SNaoya Horiguchi 		return -EINVAL;
624900fc5f1SNaoya Horiguchi 
62549b06385SSuren Baghdasaryan 	process_mm_walk_lock(walk.mm, ops->walk_lock);
62649b06385SSuren Baghdasaryan 	process_vma_walk_lock(vma, ops->walk_lock);
6277b86ac33SChristoph Hellwig 	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
628900fc5f1SNaoya Horiguchi }
629ecaad8acSThomas Hellstrom 
630ecaad8acSThomas Hellstrom /**
631ecaad8acSThomas Hellstrom  * walk_page_mapping - walk all memory areas mapped into a struct address_space.
632ecaad8acSThomas Hellstrom  * @mapping: Pointer to the struct address_space
633ecaad8acSThomas Hellstrom  * @first_index: First page offset in the address_space
634ecaad8acSThomas Hellstrom  * @nr: Number of incremental page offsets to cover
635ecaad8acSThomas Hellstrom  * @ops:	operation to call during the walk
636ecaad8acSThomas Hellstrom  * @private:	private data for callbacks' usage
637ecaad8acSThomas Hellstrom  *
638ecaad8acSThomas Hellstrom  * This function walks all memory areas mapped into a struct address_space.
639ecaad8acSThomas Hellstrom  * The walk is limited to only the given page-size index range, but if
640ecaad8acSThomas Hellstrom  * the index boundaries cross a huge page-table entry, that entry will be
641ecaad8acSThomas Hellstrom  * included.
642ecaad8acSThomas Hellstrom  *
643ecaad8acSThomas Hellstrom  * Also see walk_page_range() for additional information.
644ecaad8acSThomas Hellstrom  *
645ecaad8acSThomas Hellstrom  * Locking:
646c1e8d7c6SMichel Lespinasse  *   This function can't require that the struct mm_struct::mmap_lock is held,
647ecaad8acSThomas Hellstrom  *   since @mapping may be mapped by multiple processes. Instead
648ecaad8acSThomas Hellstrom  *   @mapping->i_mmap_rwsem must be held. This might have implications in the
649ecaad8acSThomas Hellstrom  *   callbacks, and it's up tho the caller to ensure that the
650c1e8d7c6SMichel Lespinasse  *   struct mm_struct::mmap_lock is not needed.
651ecaad8acSThomas Hellstrom  *
652ecaad8acSThomas Hellstrom  *   Also this means that a caller can't rely on the struct
653ecaad8acSThomas Hellstrom  *   vm_area_struct::vm_flags to be constant across a call,
654ecaad8acSThomas Hellstrom  *   except for immutable flags. Callers requiring this shouldn't use
655ecaad8acSThomas Hellstrom  *   this function.
656ecaad8acSThomas Hellstrom  *
657ecaad8acSThomas Hellstrom  * Return: 0 on success, negative error code on failure, positive number on
658ecaad8acSThomas Hellstrom  * caller defined premature termination.
659ecaad8acSThomas Hellstrom  */
walk_page_mapping(struct address_space * mapping,pgoff_t first_index,pgoff_t nr,const struct mm_walk_ops * ops,void * private)660ecaad8acSThomas Hellstrom int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
661ecaad8acSThomas Hellstrom 		      pgoff_t nr, const struct mm_walk_ops *ops,
662ecaad8acSThomas Hellstrom 		      void *private)
663ecaad8acSThomas Hellstrom {
664ecaad8acSThomas Hellstrom 	struct mm_walk walk = {
665ecaad8acSThomas Hellstrom 		.ops		= ops,
666ecaad8acSThomas Hellstrom 		.private	= private,
667ecaad8acSThomas Hellstrom 	};
668ecaad8acSThomas Hellstrom 	struct vm_area_struct *vma;
669ecaad8acSThomas Hellstrom 	pgoff_t vba, vea, cba, cea;
670ecaad8acSThomas Hellstrom 	unsigned long start_addr, end_addr;
671ecaad8acSThomas Hellstrom 	int err = 0;
672ecaad8acSThomas Hellstrom 
673ecaad8acSThomas Hellstrom 	lockdep_assert_held(&mapping->i_mmap_rwsem);
674ecaad8acSThomas Hellstrom 	vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
675ecaad8acSThomas Hellstrom 				  first_index + nr - 1) {
676ecaad8acSThomas Hellstrom 		/* Clip to the vma */
677ecaad8acSThomas Hellstrom 		vba = vma->vm_pgoff;
678ecaad8acSThomas Hellstrom 		vea = vba + vma_pages(vma);
679ecaad8acSThomas Hellstrom 		cba = first_index;
680ecaad8acSThomas Hellstrom 		cba = max(cba, vba);
681ecaad8acSThomas Hellstrom 		cea = first_index + nr;
682ecaad8acSThomas Hellstrom 		cea = min(cea, vea);
683ecaad8acSThomas Hellstrom 
684ecaad8acSThomas Hellstrom 		start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
685ecaad8acSThomas Hellstrom 		end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
686ecaad8acSThomas Hellstrom 		if (start_addr >= end_addr)
687ecaad8acSThomas Hellstrom 			continue;
688ecaad8acSThomas Hellstrom 
689ecaad8acSThomas Hellstrom 		walk.vma = vma;
690ecaad8acSThomas Hellstrom 		walk.mm = vma->vm_mm;
691ecaad8acSThomas Hellstrom 
692ecaad8acSThomas Hellstrom 		err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
693ecaad8acSThomas Hellstrom 		if (err > 0) {
694ecaad8acSThomas Hellstrom 			err = 0;
695ecaad8acSThomas Hellstrom 			break;
696ecaad8acSThomas Hellstrom 		} else if (err < 0)
697ecaad8acSThomas Hellstrom 			break;
698ecaad8acSThomas Hellstrom 
699ecaad8acSThomas Hellstrom 		err = __walk_page_range(start_addr, end_addr, &walk);
700ecaad8acSThomas Hellstrom 		if (err)
701ecaad8acSThomas Hellstrom 			break;
702ecaad8acSThomas Hellstrom 	}
703ecaad8acSThomas Hellstrom 
704ecaad8acSThomas Hellstrom 	return err;
705ecaad8acSThomas Hellstrom }
706