xref: /linux/mm/pagewalk.c (revision 3afc423632a194d7d6afef34e4bb98f804cd071d)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2a520110eSChristoph Hellwig #include <linux/pagewalk.h>
3e6473092SMatt Mackall #include <linux/highmem.h>
4e6473092SMatt Mackall #include <linux/sched.h>
5d33b9f45SNaoya Horiguchi #include <linux/hugetlb.h>
6e6473092SMatt Mackall 
7e6473092SMatt Mackall static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
82165009bSDave Hansen 			  struct mm_walk *walk)
9e6473092SMatt Mackall {
10e6473092SMatt Mackall 	pte_t *pte;
11e6473092SMatt Mackall 	int err = 0;
127b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
13ace88f10SThomas Hellstrom 	spinlock_t *ptl;
14e6473092SMatt Mackall 
15ace88f10SThomas Hellstrom 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
16556637cdSJohannes Weiner 	for (;;) {
177b86ac33SChristoph Hellwig 		err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
18e6473092SMatt Mackall 		if (err)
19e6473092SMatt Mackall 		       break;
20556637cdSJohannes Weiner 		addr += PAGE_SIZE;
21556637cdSJohannes Weiner 		if (addr == end)
22556637cdSJohannes Weiner 			break;
23556637cdSJohannes Weiner 		pte++;
24556637cdSJohannes Weiner 	}
25e6473092SMatt Mackall 
26ace88f10SThomas Hellstrom 	pte_unmap_unlock(pte, ptl);
27e6473092SMatt Mackall 	return err;
28e6473092SMatt Mackall }
29e6473092SMatt Mackall 
30e6473092SMatt Mackall static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
312165009bSDave Hansen 			  struct mm_walk *walk)
32e6473092SMatt Mackall {
33e6473092SMatt Mackall 	pmd_t *pmd;
34e6473092SMatt Mackall 	unsigned long next;
357b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
36e6473092SMatt Mackall 	int err = 0;
37e6473092SMatt Mackall 
38e6473092SMatt Mackall 	pmd = pmd_offset(pud, addr);
39e6473092SMatt Mackall 	do {
4003319327SDave Hansen again:
41e6473092SMatt Mackall 		next = pmd_addr_end(addr, end);
4248684a65SNaoya Horiguchi 		if (pmd_none(*pmd) || !walk->vma) {
437b86ac33SChristoph Hellwig 			if (ops->pte_hole)
447b86ac33SChristoph Hellwig 				err = ops->pte_hole(addr, next, walk);
45e6473092SMatt Mackall 			if (err)
46e6473092SMatt Mackall 				break;
47e6473092SMatt Mackall 			continue;
48e6473092SMatt Mackall 		}
49*3afc4236SSteven Price 
50*3afc4236SSteven Price 		walk->action = ACTION_SUBTREE;
51*3afc4236SSteven Price 
5203319327SDave Hansen 		/*
5303319327SDave Hansen 		 * This implies that each ->pmd_entry() handler
5403319327SDave Hansen 		 * needs to know about pmd_trans_huge() pmds
5503319327SDave Hansen 		 */
567b86ac33SChristoph Hellwig 		if (ops->pmd_entry)
577b86ac33SChristoph Hellwig 			err = ops->pmd_entry(pmd, addr, next, walk);
5803319327SDave Hansen 		if (err)
5903319327SDave Hansen 			break;
6003319327SDave Hansen 
61*3afc4236SSteven Price 		if (walk->action == ACTION_AGAIN)
62*3afc4236SSteven Price 			goto again;
63*3afc4236SSteven Price 
6403319327SDave Hansen 		/*
6503319327SDave Hansen 		 * Check this here so we only break down trans_huge
6603319327SDave Hansen 		 * pages when we _need_ to
6703319327SDave Hansen 		 */
68*3afc4236SSteven Price 		if (walk->action == ACTION_CONTINUE ||
69*3afc4236SSteven Price 		    !(ops->pte_entry))
7003319327SDave Hansen 			continue;
7103319327SDave Hansen 
7278ddc534SKirill A. Shutemov 		split_huge_pmd(walk->vma, pmd, addr);
73fafaa426SNaoya Horiguchi 		if (pmd_trans_unstable(pmd))
7403319327SDave Hansen 			goto again;
75*3afc4236SSteven Price 
762165009bSDave Hansen 		err = walk_pte_range(pmd, addr, next, walk);
77e6473092SMatt Mackall 		if (err)
78e6473092SMatt Mackall 			break;
79e6473092SMatt Mackall 	} while (pmd++, addr = next, addr != end);
80e6473092SMatt Mackall 
81e6473092SMatt Mackall 	return err;
82e6473092SMatt Mackall }
83e6473092SMatt Mackall 
84c2febafcSKirill A. Shutemov static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
852165009bSDave Hansen 			  struct mm_walk *walk)
86e6473092SMatt Mackall {
87e6473092SMatt Mackall 	pud_t *pud;
88e6473092SMatt Mackall 	unsigned long next;
897b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
90e6473092SMatt Mackall 	int err = 0;
91e6473092SMatt Mackall 
92c2febafcSKirill A. Shutemov 	pud = pud_offset(p4d, addr);
93e6473092SMatt Mackall 	do {
94a00cc7d9SMatthew Wilcox  again:
95e6473092SMatt Mackall 		next = pud_addr_end(addr, end);
96a00cc7d9SMatthew Wilcox 		if (pud_none(*pud) || !walk->vma) {
977b86ac33SChristoph Hellwig 			if (ops->pte_hole)
987b86ac33SChristoph Hellwig 				err = ops->pte_hole(addr, next, walk);
99e6473092SMatt Mackall 			if (err)
100e6473092SMatt Mackall 				break;
101e6473092SMatt Mackall 			continue;
102e6473092SMatt Mackall 		}
103a00cc7d9SMatthew Wilcox 
104*3afc4236SSteven Price 		walk->action = ACTION_SUBTREE;
105a00cc7d9SMatthew Wilcox 
106*3afc4236SSteven Price 		if (ops->pud_entry)
1077b86ac33SChristoph Hellwig 			err = ops->pud_entry(pud, addr, next, walk);
108a00cc7d9SMatthew Wilcox 		if (err)
109a00cc7d9SMatthew Wilcox 			break;
110*3afc4236SSteven Price 
111*3afc4236SSteven Price 		if (walk->action == ACTION_AGAIN)
112*3afc4236SSteven Price 			goto again;
113*3afc4236SSteven Price 
114*3afc4236SSteven Price 		if (walk->action == ACTION_CONTINUE ||
115*3afc4236SSteven Price 		    !(ops->pmd_entry || ops->pte_entry))
116a00cc7d9SMatthew Wilcox 			continue;
117a00cc7d9SMatthew Wilcox 
118a00cc7d9SMatthew Wilcox 		split_huge_pud(walk->vma, pud, addr);
119a00cc7d9SMatthew Wilcox 		if (pud_none(*pud))
120a00cc7d9SMatthew Wilcox 			goto again;
121a00cc7d9SMatthew Wilcox 
1222165009bSDave Hansen 		err = walk_pmd_range(pud, addr, next, walk);
123e6473092SMatt Mackall 		if (err)
124e6473092SMatt Mackall 			break;
125e6473092SMatt Mackall 	} while (pud++, addr = next, addr != end);
126e6473092SMatt Mackall 
127e6473092SMatt Mackall 	return err;
128e6473092SMatt Mackall }
129e6473092SMatt Mackall 
130c2febafcSKirill A. Shutemov static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
131c2febafcSKirill A. Shutemov 			  struct mm_walk *walk)
132c2febafcSKirill A. Shutemov {
133c2febafcSKirill A. Shutemov 	p4d_t *p4d;
134c2febafcSKirill A. Shutemov 	unsigned long next;
1357b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
136c2febafcSKirill A. Shutemov 	int err = 0;
137c2febafcSKirill A. Shutemov 
138c2febafcSKirill A. Shutemov 	p4d = p4d_offset(pgd, addr);
139c2febafcSKirill A. Shutemov 	do {
140c2febafcSKirill A. Shutemov 		next = p4d_addr_end(addr, end);
141c2febafcSKirill A. Shutemov 		if (p4d_none_or_clear_bad(p4d)) {
1427b86ac33SChristoph Hellwig 			if (ops->pte_hole)
1437b86ac33SChristoph Hellwig 				err = ops->pte_hole(addr, next, walk);
144c2febafcSKirill A. Shutemov 			if (err)
145c2febafcSKirill A. Shutemov 				break;
146c2febafcSKirill A. Shutemov 			continue;
147c2febafcSKirill A. Shutemov 		}
148*3afc4236SSteven Price 		if (ops->p4d_entry) {
149*3afc4236SSteven Price 			err = ops->p4d_entry(p4d, addr, next, walk);
150*3afc4236SSteven Price 			if (err)
151*3afc4236SSteven Price 				break;
152*3afc4236SSteven Price 		}
153*3afc4236SSteven Price 		if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
154c2febafcSKirill A. Shutemov 			err = walk_pud_range(p4d, addr, next, walk);
155c2febafcSKirill A. Shutemov 		if (err)
156c2febafcSKirill A. Shutemov 			break;
157c2febafcSKirill A. Shutemov 	} while (p4d++, addr = next, addr != end);
158c2febafcSKirill A. Shutemov 
159c2febafcSKirill A. Shutemov 	return err;
160c2febafcSKirill A. Shutemov }
161c2febafcSKirill A. Shutemov 
162fafaa426SNaoya Horiguchi static int walk_pgd_range(unsigned long addr, unsigned long end,
163fafaa426SNaoya Horiguchi 			  struct mm_walk *walk)
164fafaa426SNaoya Horiguchi {
165fafaa426SNaoya Horiguchi 	pgd_t *pgd;
166fafaa426SNaoya Horiguchi 	unsigned long next;
1677b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
168fafaa426SNaoya Horiguchi 	int err = 0;
169fafaa426SNaoya Horiguchi 
170fafaa426SNaoya Horiguchi 	pgd = pgd_offset(walk->mm, addr);
171fafaa426SNaoya Horiguchi 	do {
172fafaa426SNaoya Horiguchi 		next = pgd_addr_end(addr, end);
173fafaa426SNaoya Horiguchi 		if (pgd_none_or_clear_bad(pgd)) {
1747b86ac33SChristoph Hellwig 			if (ops->pte_hole)
1757b86ac33SChristoph Hellwig 				err = ops->pte_hole(addr, next, walk);
176fafaa426SNaoya Horiguchi 			if (err)
177fafaa426SNaoya Horiguchi 				break;
178fafaa426SNaoya Horiguchi 			continue;
179fafaa426SNaoya Horiguchi 		}
180*3afc4236SSteven Price 		if (ops->pgd_entry) {
181*3afc4236SSteven Price 			err = ops->pgd_entry(pgd, addr, next, walk);
182*3afc4236SSteven Price 			if (err)
183*3afc4236SSteven Price 				break;
184*3afc4236SSteven Price 		}
185*3afc4236SSteven Price 		if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
186*3afc4236SSteven Price 		    ops->pte_entry)
187c2febafcSKirill A. Shutemov 			err = walk_p4d_range(pgd, addr, next, walk);
188fafaa426SNaoya Horiguchi 		if (err)
189fafaa426SNaoya Horiguchi 			break;
190fafaa426SNaoya Horiguchi 	} while (pgd++, addr = next, addr != end);
191fafaa426SNaoya Horiguchi 
192fafaa426SNaoya Horiguchi 	return err;
193fafaa426SNaoya Horiguchi }
194fafaa426SNaoya Horiguchi 
195116354d1SNaoya Horiguchi #ifdef CONFIG_HUGETLB_PAGE
196116354d1SNaoya Horiguchi static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
197116354d1SNaoya Horiguchi 				       unsigned long end)
198116354d1SNaoya Horiguchi {
199116354d1SNaoya Horiguchi 	unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
200116354d1SNaoya Horiguchi 	return boundary < end ? boundary : end;
201116354d1SNaoya Horiguchi }
202116354d1SNaoya Horiguchi 
203fafaa426SNaoya Horiguchi static int walk_hugetlb_range(unsigned long addr, unsigned long end,
204116354d1SNaoya Horiguchi 			      struct mm_walk *walk)
205116354d1SNaoya Horiguchi {
206fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
207116354d1SNaoya Horiguchi 	struct hstate *h = hstate_vma(vma);
208116354d1SNaoya Horiguchi 	unsigned long next;
209116354d1SNaoya Horiguchi 	unsigned long hmask = huge_page_mask(h);
2107868a208SPunit Agrawal 	unsigned long sz = huge_page_size(h);
211116354d1SNaoya Horiguchi 	pte_t *pte;
2127b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
213116354d1SNaoya Horiguchi 	int err = 0;
214116354d1SNaoya Horiguchi 
215116354d1SNaoya Horiguchi 	do {
216116354d1SNaoya Horiguchi 		next = hugetlb_entry_end(h, addr, end);
2177868a208SPunit Agrawal 		pte = huge_pte_offset(walk->mm, addr & hmask, sz);
218373c4557SJann Horn 
219373c4557SJann Horn 		if (pte)
2207b86ac33SChristoph Hellwig 			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
2217b86ac33SChristoph Hellwig 		else if (ops->pte_hole)
2227b86ac33SChristoph Hellwig 			err = ops->pte_hole(addr, next, walk);
223373c4557SJann Horn 
224116354d1SNaoya Horiguchi 		if (err)
225fafaa426SNaoya Horiguchi 			break;
226116354d1SNaoya Horiguchi 	} while (addr = next, addr != end);
227116354d1SNaoya Horiguchi 
228fafaa426SNaoya Horiguchi 	return err;
229116354d1SNaoya Horiguchi }
2306c6d5280SKOSAKI Motohiro 
2316c6d5280SKOSAKI Motohiro #else /* CONFIG_HUGETLB_PAGE */
232fafaa426SNaoya Horiguchi static int walk_hugetlb_range(unsigned long addr, unsigned long end,
2336c6d5280SKOSAKI Motohiro 			      struct mm_walk *walk)
2346c6d5280SKOSAKI Motohiro {
2356c6d5280SKOSAKI Motohiro 	return 0;
2366c6d5280SKOSAKI Motohiro }
2376c6d5280SKOSAKI Motohiro 
2386c6d5280SKOSAKI Motohiro #endif /* CONFIG_HUGETLB_PAGE */
2396c6d5280SKOSAKI Motohiro 
240fafaa426SNaoya Horiguchi /*
241fafaa426SNaoya Horiguchi  * Decide whether we really walk over the current vma on [@start, @end)
242fafaa426SNaoya Horiguchi  * or skip it via the returned value. Return 0 if we do walk over the
243fafaa426SNaoya Horiguchi  * current vma, and return 1 if we skip the vma. Negative values means
244fafaa426SNaoya Horiguchi  * error, where we abort the current walk.
245e6473092SMatt Mackall  */
246fafaa426SNaoya Horiguchi static int walk_page_test(unsigned long start, unsigned long end,
2472165009bSDave Hansen 			struct mm_walk *walk)
248e6473092SMatt Mackall {
249fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
2507b86ac33SChristoph Hellwig 	const struct mm_walk_ops *ops = walk->ops;
251e6473092SMatt Mackall 
2527b86ac33SChristoph Hellwig 	if (ops->test_walk)
2537b86ac33SChristoph Hellwig 		return ops->test_walk(start, end, walk);
254fafaa426SNaoya Horiguchi 
255fafaa426SNaoya Horiguchi 	/*
25648684a65SNaoya Horiguchi 	 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
25748684a65SNaoya Horiguchi 	 * range, so we don't walk over it as we do for normal vmas. However,
25848684a65SNaoya Horiguchi 	 * Some callers are interested in handling hole range and they don't
25948684a65SNaoya Horiguchi 	 * want to just ignore any single address range. Such users certainly
26048684a65SNaoya Horiguchi 	 * define their ->pte_hole() callbacks, so let's delegate them to handle
26148684a65SNaoya Horiguchi 	 * vma(VM_PFNMAP).
262fafaa426SNaoya Horiguchi 	 */
26348684a65SNaoya Horiguchi 	if (vma->vm_flags & VM_PFNMAP) {
26448684a65SNaoya Horiguchi 		int err = 1;
2657b86ac33SChristoph Hellwig 		if (ops->pte_hole)
2667b86ac33SChristoph Hellwig 			err = ops->pte_hole(start, end, walk);
26748684a65SNaoya Horiguchi 		return err ? err : 1;
26848684a65SNaoya Horiguchi 	}
269fafaa426SNaoya Horiguchi 	return 0;
270fafaa426SNaoya Horiguchi }
271fafaa426SNaoya Horiguchi 
272fafaa426SNaoya Horiguchi static int __walk_page_range(unsigned long start, unsigned long end,
273fafaa426SNaoya Horiguchi 			struct mm_walk *walk)
274fafaa426SNaoya Horiguchi {
275fafaa426SNaoya Horiguchi 	int err = 0;
276fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma = walk->vma;
277ecaad8acSThomas Hellstrom 	const struct mm_walk_ops *ops = walk->ops;
278ecaad8acSThomas Hellstrom 
279ecaad8acSThomas Hellstrom 	if (vma && ops->pre_vma) {
280ecaad8acSThomas Hellstrom 		err = ops->pre_vma(start, end, walk);
281ecaad8acSThomas Hellstrom 		if (err)
282ecaad8acSThomas Hellstrom 			return err;
283ecaad8acSThomas Hellstrom 	}
284fafaa426SNaoya Horiguchi 
285fafaa426SNaoya Horiguchi 	if (vma && is_vm_hugetlb_page(vma)) {
286ecaad8acSThomas Hellstrom 		if (ops->hugetlb_entry)
287fafaa426SNaoya Horiguchi 			err = walk_hugetlb_range(start, end, walk);
288fafaa426SNaoya Horiguchi 	} else
289fafaa426SNaoya Horiguchi 		err = walk_pgd_range(start, end, walk);
290fafaa426SNaoya Horiguchi 
291ecaad8acSThomas Hellstrom 	if (vma && ops->post_vma)
292ecaad8acSThomas Hellstrom 		ops->post_vma(walk);
293ecaad8acSThomas Hellstrom 
294e6473092SMatt Mackall 	return err;
295fafaa426SNaoya Horiguchi }
296fafaa426SNaoya Horiguchi 
297fafaa426SNaoya Horiguchi /**
298fafaa426SNaoya Horiguchi  * walk_page_range - walk page table with caller specific callbacks
2997b86ac33SChristoph Hellwig  * @mm:		mm_struct representing the target process of page table walk
300e8b098fcSMike Rapoport  * @start:	start address of the virtual address range
301e8b098fcSMike Rapoport  * @end:	end address of the virtual address range
3027b86ac33SChristoph Hellwig  * @ops:	operation to call during the walk
3037b86ac33SChristoph Hellwig  * @private:	private data for callbacks' usage
304fafaa426SNaoya Horiguchi  *
3057b86ac33SChristoph Hellwig  * Recursively walk the page table tree of the process represented by @mm
306fafaa426SNaoya Horiguchi  * within the virtual address range [@start, @end). During walking, we can do
307fafaa426SNaoya Horiguchi  * some caller-specific works for each entry, by setting up pmd_entry(),
308fafaa426SNaoya Horiguchi  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
309fafaa426SNaoya Horiguchi  * callbacks, the associated entries/pages are just ignored.
310fafaa426SNaoya Horiguchi  * The return values of these callbacks are commonly defined like below:
311a5d09bedSMike Rapoport  *
312fafaa426SNaoya Horiguchi  *  - 0  : succeeded to handle the current entry, and if you don't reach the
313fafaa426SNaoya Horiguchi  *         end address yet, continue to walk.
314fafaa426SNaoya Horiguchi  *  - >0 : succeeded to handle the current entry, and return to the caller
315fafaa426SNaoya Horiguchi  *         with caller specific value.
316fafaa426SNaoya Horiguchi  *  - <0 : failed to handle the current entry, and return to the caller
317fafaa426SNaoya Horiguchi  *         with error code.
318fafaa426SNaoya Horiguchi  *
319fafaa426SNaoya Horiguchi  * Before starting to walk page table, some callers want to check whether
320fafaa426SNaoya Horiguchi  * they really want to walk over the current vma, typically by checking
3217b86ac33SChristoph Hellwig  * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
322fafaa426SNaoya Horiguchi  * purpose.
323fafaa426SNaoya Horiguchi  *
324ecaad8acSThomas Hellstrom  * If operations need to be staged before and committed after a vma is walked,
325ecaad8acSThomas Hellstrom  * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
326ecaad8acSThomas Hellstrom  * since it is intended to handle commit-type operations, can't return any
327ecaad8acSThomas Hellstrom  * errors.
328ecaad8acSThomas Hellstrom  *
329fafaa426SNaoya Horiguchi  * struct mm_walk keeps current values of some common data like vma and pmd,
330fafaa426SNaoya Horiguchi  * which are useful for the access from callbacks. If you want to pass some
3317b86ac33SChristoph Hellwig  * caller-specific data to callbacks, @private should be helpful.
332fafaa426SNaoya Horiguchi  *
333fafaa426SNaoya Horiguchi  * Locking:
3347b86ac33SChristoph Hellwig  *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem,
3357b86ac33SChristoph Hellwig  *   because these function traverse vma list and/or access to vma's data.
336fafaa426SNaoya Horiguchi  */
3377b86ac33SChristoph Hellwig int walk_page_range(struct mm_struct *mm, unsigned long start,
3387b86ac33SChristoph Hellwig 		unsigned long end, const struct mm_walk_ops *ops,
3397b86ac33SChristoph Hellwig 		void *private)
340fafaa426SNaoya Horiguchi {
341fafaa426SNaoya Horiguchi 	int err = 0;
342fafaa426SNaoya Horiguchi 	unsigned long next;
343fafaa426SNaoya Horiguchi 	struct vm_area_struct *vma;
3447b86ac33SChristoph Hellwig 	struct mm_walk walk = {
3457b86ac33SChristoph Hellwig 		.ops		= ops,
3467b86ac33SChristoph Hellwig 		.mm		= mm,
3477b86ac33SChristoph Hellwig 		.private	= private,
3487b86ac33SChristoph Hellwig 	};
349fafaa426SNaoya Horiguchi 
350fafaa426SNaoya Horiguchi 	if (start >= end)
351fafaa426SNaoya Horiguchi 		return -EINVAL;
352e6473092SMatt Mackall 
3537b86ac33SChristoph Hellwig 	if (!walk.mm)
3542165009bSDave Hansen 		return -EINVAL;
3552165009bSDave Hansen 
356b4bc7817SChristoph Hellwig 	lockdep_assert_held(&walk.mm->mmap_sem);
357a9ff785eSCliff Wickman 
3587b86ac33SChristoph Hellwig 	vma = find_vma(walk.mm, start);
359e6473092SMatt Mackall 	do {
360fafaa426SNaoya Horiguchi 		if (!vma) { /* after the last vma */
3617b86ac33SChristoph Hellwig 			walk.vma = NULL;
362fafaa426SNaoya Horiguchi 			next = end;
363fafaa426SNaoya Horiguchi 		} else if (start < vma->vm_start) { /* outside vma */
3647b86ac33SChristoph Hellwig 			walk.vma = NULL;
365fafaa426SNaoya Horiguchi 			next = min(end, vma->vm_start);
366fafaa426SNaoya Horiguchi 		} else { /* inside vma */
3677b86ac33SChristoph Hellwig 			walk.vma = vma;
368fafaa426SNaoya Horiguchi 			next = min(end, vma->vm_end);
369fafaa426SNaoya Horiguchi 			vma = vma->vm_next;
3705f0af70aSDavid Sterba 
3717b86ac33SChristoph Hellwig 			err = walk_page_test(start, next, &walk);
372f6837395SNaoya Horiguchi 			if (err > 0) {
373f6837395SNaoya Horiguchi 				/*
374f6837395SNaoya Horiguchi 				 * positive return values are purely for
375f6837395SNaoya Horiguchi 				 * controlling the pagewalk, so should never
376f6837395SNaoya Horiguchi 				 * be passed to the callers.
377f6837395SNaoya Horiguchi 				 */
378f6837395SNaoya Horiguchi 				err = 0;
379a9ff785eSCliff Wickman 				continue;
380f6837395SNaoya Horiguchi 			}
381fafaa426SNaoya Horiguchi 			if (err < 0)
382fafaa426SNaoya Horiguchi 				break;
383a9ff785eSCliff Wickman 		}
3847b86ac33SChristoph Hellwig 		if (walk.vma || walk.ops->pte_hole)
3857b86ac33SChristoph Hellwig 			err = __walk_page_range(start, next, &walk);
3865dc37642SNaoya Horiguchi 		if (err)
3875dc37642SNaoya Horiguchi 			break;
388fafaa426SNaoya Horiguchi 	} while (start = next, start < end);
389e6473092SMatt Mackall 	return err;
390e6473092SMatt Mackall }
391900fc5f1SNaoya Horiguchi 
3927b86ac33SChristoph Hellwig int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
3937b86ac33SChristoph Hellwig 		void *private)
394900fc5f1SNaoya Horiguchi {
3957b86ac33SChristoph Hellwig 	struct mm_walk walk = {
3967b86ac33SChristoph Hellwig 		.ops		= ops,
3977b86ac33SChristoph Hellwig 		.mm		= vma->vm_mm,
3987b86ac33SChristoph Hellwig 		.vma		= vma,
3997b86ac33SChristoph Hellwig 		.private	= private,
4007b86ac33SChristoph Hellwig 	};
401900fc5f1SNaoya Horiguchi 	int err;
402900fc5f1SNaoya Horiguchi 
4037b86ac33SChristoph Hellwig 	if (!walk.mm)
404900fc5f1SNaoya Horiguchi 		return -EINVAL;
405900fc5f1SNaoya Horiguchi 
406b4bc7817SChristoph Hellwig 	lockdep_assert_held(&walk.mm->mmap_sem);
4077b86ac33SChristoph Hellwig 
4087b86ac33SChristoph Hellwig 	err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
409900fc5f1SNaoya Horiguchi 	if (err > 0)
410900fc5f1SNaoya Horiguchi 		return 0;
411900fc5f1SNaoya Horiguchi 	if (err < 0)
412900fc5f1SNaoya Horiguchi 		return err;
4137b86ac33SChristoph Hellwig 	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
414900fc5f1SNaoya Horiguchi }
415ecaad8acSThomas Hellstrom 
416ecaad8acSThomas Hellstrom /**
417ecaad8acSThomas Hellstrom  * walk_page_mapping - walk all memory areas mapped into a struct address_space.
418ecaad8acSThomas Hellstrom  * @mapping: Pointer to the struct address_space
419ecaad8acSThomas Hellstrom  * @first_index: First page offset in the address_space
420ecaad8acSThomas Hellstrom  * @nr: Number of incremental page offsets to cover
421ecaad8acSThomas Hellstrom  * @ops:	operation to call during the walk
422ecaad8acSThomas Hellstrom  * @private:	private data for callbacks' usage
423ecaad8acSThomas Hellstrom  *
424ecaad8acSThomas Hellstrom  * This function walks all memory areas mapped into a struct address_space.
425ecaad8acSThomas Hellstrom  * The walk is limited to only the given page-size index range, but if
426ecaad8acSThomas Hellstrom  * the index boundaries cross a huge page-table entry, that entry will be
427ecaad8acSThomas Hellstrom  * included.
428ecaad8acSThomas Hellstrom  *
429ecaad8acSThomas Hellstrom  * Also see walk_page_range() for additional information.
430ecaad8acSThomas Hellstrom  *
431ecaad8acSThomas Hellstrom  * Locking:
432ecaad8acSThomas Hellstrom  *   This function can't require that the struct mm_struct::mmap_sem is held,
433ecaad8acSThomas Hellstrom  *   since @mapping may be mapped by multiple processes. Instead
434ecaad8acSThomas Hellstrom  *   @mapping->i_mmap_rwsem must be held. This might have implications in the
435ecaad8acSThomas Hellstrom  *   callbacks, and it's up tho the caller to ensure that the
436ecaad8acSThomas Hellstrom  *   struct mm_struct::mmap_sem is not needed.
437ecaad8acSThomas Hellstrom  *
438ecaad8acSThomas Hellstrom  *   Also this means that a caller can't rely on the struct
439ecaad8acSThomas Hellstrom  *   vm_area_struct::vm_flags to be constant across a call,
440ecaad8acSThomas Hellstrom  *   except for immutable flags. Callers requiring this shouldn't use
441ecaad8acSThomas Hellstrom  *   this function.
442ecaad8acSThomas Hellstrom  *
443ecaad8acSThomas Hellstrom  * Return: 0 on success, negative error code on failure, positive number on
444ecaad8acSThomas Hellstrom  * caller defined premature termination.
445ecaad8acSThomas Hellstrom  */
446ecaad8acSThomas Hellstrom int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
447ecaad8acSThomas Hellstrom 		      pgoff_t nr, const struct mm_walk_ops *ops,
448ecaad8acSThomas Hellstrom 		      void *private)
449ecaad8acSThomas Hellstrom {
450ecaad8acSThomas Hellstrom 	struct mm_walk walk = {
451ecaad8acSThomas Hellstrom 		.ops		= ops,
452ecaad8acSThomas Hellstrom 		.private	= private,
453ecaad8acSThomas Hellstrom 	};
454ecaad8acSThomas Hellstrom 	struct vm_area_struct *vma;
455ecaad8acSThomas Hellstrom 	pgoff_t vba, vea, cba, cea;
456ecaad8acSThomas Hellstrom 	unsigned long start_addr, end_addr;
457ecaad8acSThomas Hellstrom 	int err = 0;
458ecaad8acSThomas Hellstrom 
459ecaad8acSThomas Hellstrom 	lockdep_assert_held(&mapping->i_mmap_rwsem);
460ecaad8acSThomas Hellstrom 	vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
461ecaad8acSThomas Hellstrom 				  first_index + nr - 1) {
462ecaad8acSThomas Hellstrom 		/* Clip to the vma */
463ecaad8acSThomas Hellstrom 		vba = vma->vm_pgoff;
464ecaad8acSThomas Hellstrom 		vea = vba + vma_pages(vma);
465ecaad8acSThomas Hellstrom 		cba = first_index;
466ecaad8acSThomas Hellstrom 		cba = max(cba, vba);
467ecaad8acSThomas Hellstrom 		cea = first_index + nr;
468ecaad8acSThomas Hellstrom 		cea = min(cea, vea);
469ecaad8acSThomas Hellstrom 
470ecaad8acSThomas Hellstrom 		start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
471ecaad8acSThomas Hellstrom 		end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
472ecaad8acSThomas Hellstrom 		if (start_addr >= end_addr)
473ecaad8acSThomas Hellstrom 			continue;
474ecaad8acSThomas Hellstrom 
475ecaad8acSThomas Hellstrom 		walk.vma = vma;
476ecaad8acSThomas Hellstrom 		walk.mm = vma->vm_mm;
477ecaad8acSThomas Hellstrom 
478ecaad8acSThomas Hellstrom 		err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
479ecaad8acSThomas Hellstrom 		if (err > 0) {
480ecaad8acSThomas Hellstrom 			err = 0;
481ecaad8acSThomas Hellstrom 			break;
482ecaad8acSThomas Hellstrom 		} else if (err < 0)
483ecaad8acSThomas Hellstrom 			break;
484ecaad8acSThomas Hellstrom 
485ecaad8acSThomas Hellstrom 		err = __walk_page_range(start_addr, end_addr, &walk);
486ecaad8acSThomas Hellstrom 		if (err)
487ecaad8acSThomas Hellstrom 			break;
488ecaad8acSThomas Hellstrom 	}
489ecaad8acSThomas Hellstrom 
490ecaad8acSThomas Hellstrom 	return err;
491ecaad8acSThomas Hellstrom }
492