xref: /linux/mm/damon/vaddr.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * DAMON Code for Virtual Address Spaces
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #define pr_fmt(fmt) "damon-va: " fmt
9 
10 #include <linux/highmem.h>
11 #include <linux/hugetlb.h>
12 #include <linux/mman.h>
13 #include <linux/mmu_notifier.h>
14 #include <linux/page_idle.h>
15 #include <linux/pagewalk.h>
16 #include <linux/sched/mm.h>
17 
18 #include "../internal.h"
19 #include "ops-common.h"
20 
21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
22 #undef DAMON_MIN_REGION_SZ
23 #define DAMON_MIN_REGION_SZ 1
24 #endif
25 
26 /*
27  * 't->pid' should be the pointer to the relevant 'struct pid' having reference
28  * count.  Caller must put the returned task, unless it is NULL.
29  */
30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
31 {
32 	return get_pid_task(t->pid, PIDTYPE_PID);
33 }
34 
35 /*
36  * Get the mm_struct of the given target
37  *
38  * Caller _must_ put the mm_struct after use, unless it is NULL.
39  *
40  * Returns the mm_struct of the target on success, NULL on failure
41  */
42 static struct mm_struct *damon_get_mm(struct damon_target *t)
43 {
44 	struct task_struct *task;
45 	struct mm_struct *mm;
46 
47 	task = damon_get_task_struct(t);
48 	if (!task)
49 		return NULL;
50 
51 	mm = get_task_mm(task);
52 	put_task_struct(task);
53 	return mm;
54 }
55 
56 static unsigned long sz_range(struct damon_addr_range *r)
57 {
58 	return r->end - r->start;
59 }
60 
61 /*
62  * Find three regions separated by two biggest unmapped regions
63  *
64  * vma		the head vma of the target address space
65  * regions	an array of three address ranges that results will be saved
66  *
67  * This function receives an address space and finds three regions in it which
68  * separated by the two biggest unmapped regions in the space.  Please refer to
69  * below comments of '__damon_va_init_regions()' function to know why this is
70  * necessary.
71  *
72  * Returns 0 if success, or negative error code otherwise.
73  */
74 static int __damon_va_three_regions(struct mm_struct *mm,
75 				       struct damon_addr_range regions[3])
76 {
77 	struct damon_addr_range first_gap = {0}, second_gap = {0};
78 	VMA_ITERATOR(vmi, mm, 0);
79 	struct vm_area_struct *vma, *prev = NULL;
80 	unsigned long start;
81 
82 	/*
83 	 * Find the two biggest gaps so that first_gap > second_gap > others.
84 	 * If this is too slow, it can be optimised to examine the maple
85 	 * tree gaps.
86 	 */
87 	rcu_read_lock();
88 	for_each_vma(vmi, vma) {
89 		unsigned long gap;
90 
91 		if (!prev) {
92 			start = vma->vm_start;
93 			goto next;
94 		}
95 		gap = vma->vm_start - prev->vm_end;
96 
97 		if (gap > sz_range(&first_gap)) {
98 			second_gap = first_gap;
99 			first_gap.start = prev->vm_end;
100 			first_gap.end = vma->vm_start;
101 		} else if (gap > sz_range(&second_gap)) {
102 			second_gap.start = prev->vm_end;
103 			second_gap.end = vma->vm_start;
104 		}
105 next:
106 		prev = vma;
107 	}
108 	rcu_read_unlock();
109 
110 	if (!sz_range(&second_gap) || !sz_range(&first_gap))
111 		return -EINVAL;
112 
113 	/* Sort the two biggest gaps by address */
114 	if (first_gap.start > second_gap.start)
115 		swap(first_gap, second_gap);
116 
117 	/* Store the result */
118 	regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ);
119 	regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ);
120 	regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ);
121 	regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ);
122 	regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ);
123 	regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ);
124 
125 	return 0;
126 }
127 
128 /*
129  * Get the three regions in the given target (task)
130  *
131  * Returns 0 on success, negative error code otherwise.
132  */
133 static int damon_va_three_regions(struct damon_target *t,
134 				struct damon_addr_range regions[3])
135 {
136 	struct mm_struct *mm;
137 	int rc;
138 
139 	mm = damon_get_mm(t);
140 	if (!mm)
141 		return -EINVAL;
142 
143 	mmap_read_lock(mm);
144 	rc = __damon_va_three_regions(mm, regions);
145 	mmap_read_unlock(mm);
146 
147 	mmput(mm);
148 	return rc;
149 }
150 
151 /*
152  * Initialize the monitoring target regions for the given target (task)
153  *
154  * t	the given target
155  *
156  * Because only a number of small portions of the entire address space
157  * is actually mapped to the memory and accessed, monitoring the unmapped
158  * regions is wasteful.  That said, because we can deal with small noises,
159  * tracking every mapping is not strictly required but could even incur a high
160  * overhead if the mapping frequently changes or the number of mappings is
161  * high.  The adaptive regions adjustment mechanism will further help to deal
162  * with the noise by simply identifying the unmapped areas as a region that
163  * has no access.  Moreover, applying the real mappings that would have many
164  * unmapped areas inside will make the adaptive mechanism quite complex.  That
165  * said, too huge unmapped areas inside the monitoring target should be removed
166  * to not take the time for the adaptive mechanism.
167  *
168  * For the reason, we convert the complex mappings to three distinct regions
169  * that cover every mapped area of the address space.  Also the two gaps
170  * between the three regions are the two biggest unmapped areas in the given
171  * address space.  In detail, this function first identifies the start and the
172  * end of the mappings and the two biggest unmapped areas of the address space.
173  * Then, it constructs the three regions as below:
174  *
175  *     [mappings[0]->start, big_two_unmapped_areas[0]->start)
176  *     [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
177  *     [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
178  *
179  * As usual memory map of processes is as below, the gap between the heap and
180  * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
181  * region and the stack will be two biggest unmapped regions.  Because these
182  * gaps are exceptionally huge areas in usual address space, excluding these
183  * two biggest unmapped regions will be sufficient to make a trade-off.
184  *
185  *   <heap>
186  *   <BIG UNMAPPED REGION 1>
187  *   <uppermost mmap()-ed region>
188  *   (other mmap()-ed regions and small unmapped regions)
189  *   <lowermost mmap()-ed region>
190  *   <BIG UNMAPPED REGION 2>
191  *   <stack>
192  */
193 static void __damon_va_init_regions(struct damon_ctx *ctx,
194 				     struct damon_target *t)
195 {
196 	struct damon_target *ti;
197 	struct damon_addr_range regions[3];
198 	int tidx = 0;
199 
200 	if (damon_va_three_regions(t, regions)) {
201 		damon_for_each_target(ti, ctx) {
202 			if (ti == t)
203 				break;
204 			tidx++;
205 		}
206 		pr_debug("Failed to get three regions of %dth target\n", tidx);
207 		return;
208 	}
209 
210 	damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ);
211 }
212 
213 /* Initialize '->regions_list' of every target (task) */
214 static void damon_va_init(struct damon_ctx *ctx)
215 {
216 	struct damon_target *t;
217 
218 	damon_for_each_target(t, ctx) {
219 		/* the user may set the target regions as they want */
220 		if (!damon_nr_regions(t))
221 			__damon_va_init_regions(ctx, t);
222 	}
223 }
224 
225 /*
226  * Update regions for current memory mappings
227  */
228 static void damon_va_update(struct damon_ctx *ctx)
229 {
230 	struct damon_addr_range three_regions[3];
231 	struct damon_target *t;
232 
233 	damon_for_each_target(t, ctx) {
234 		if (damon_va_three_regions(t, three_regions))
235 			continue;
236 		damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ);
237 	}
238 }
239 
240 static void damon_va_walk_page_range(struct mm_struct *mm, unsigned long start,
241 		unsigned long end, struct mm_walk_ops *ops, void *private)
242 {
243 	struct vm_area_struct *vma;
244 
245 	vma = lock_vma_under_rcu(mm, start);
246 	if (!vma)
247 		goto lock_mmap;
248 
249 	if (end > vma->vm_end) {
250 		vma_end_read(vma);
251 		goto lock_mmap;
252 	}
253 
254 	if (!(vma->vm_flags & VM_PFNMAP)) {
255 		ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
256 		walk_page_range_vma(vma, start, end, ops, private);
257 	}
258 
259 	vma_end_read(vma);
260 	return;
261 
262 lock_mmap:
263 	mmap_read_lock(mm);
264 	ops->walk_lock = PGWALK_RDLOCK;
265 	walk_page_range(mm, start, end, ops, private);
266 	mmap_read_unlock(mm);
267 }
268 
269 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
270 		unsigned long next, struct mm_walk *walk)
271 {
272 	pte_t *pte;
273 	spinlock_t *ptl;
274 
275 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
276 	if (ptl) {
277 		pmd_t pmde = pmdp_get(pmd);
278 
279 		if (pmd_present(pmde))
280 			damon_pmdp_mkold(pmd, walk->vma, addr);
281 		spin_unlock(ptl);
282 		return 0;
283 	}
284 
285 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
286 	if (!pte)
287 		return 0;
288 	if (!pte_present(ptep_get(pte)))
289 		goto out;
290 	damon_ptep_mkold(pte, walk->vma, addr);
291 out:
292 	pte_unmap_unlock(pte, ptl);
293 	return 0;
294 }
295 
296 #ifdef CONFIG_HUGETLB_PAGE
297 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
298 				struct vm_area_struct *vma, unsigned long addr)
299 {
300 	bool referenced = false;
301 	pte_t entry = huge_ptep_get(mm, addr, pte);
302 	struct folio *folio = pfn_folio(pte_pfn(entry));
303 	unsigned long psize = huge_page_size(hstate_vma(vma));
304 
305 	folio_get(folio);
306 
307 	if (pte_young(entry)) {
308 		referenced = true;
309 		entry = pte_mkold(entry);
310 		set_huge_pte_at(mm, addr, pte, entry, psize);
311 	}
312 
313 	if (mmu_notifier_clear_young(mm, addr,
314 				     addr + huge_page_size(hstate_vma(vma))))
315 		referenced = true;
316 
317 	if (referenced)
318 		folio_set_young(folio);
319 
320 	folio_set_idle(folio);
321 	folio_put(folio);
322 }
323 
324 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
325 				     unsigned long addr, unsigned long end,
326 				     struct mm_walk *walk)
327 {
328 	struct hstate *h = hstate_vma(walk->vma);
329 	spinlock_t *ptl;
330 	pte_t entry;
331 
332 	ptl = huge_pte_lock(h, walk->mm, pte);
333 	entry = huge_ptep_get(walk->mm, addr, pte);
334 	if (!pte_present(entry))
335 		goto out;
336 
337 	damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
338 
339 out:
340 	spin_unlock(ptl);
341 	return 0;
342 }
343 #else
344 #define damon_mkold_hugetlb_entry NULL
345 #endif /* CONFIG_HUGETLB_PAGE */
346 
347 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
348 {
349 	struct mm_walk_ops damon_mkold_ops = {
350 		.pmd_entry = damon_mkold_pmd_entry,
351 		.hugetlb_entry = damon_mkold_hugetlb_entry,
352 	};
353 
354 	damon_va_walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
355 }
356 
357 /*
358  * Functions for the access checking of the regions
359  */
360 
361 static void __damon_va_prepare_access_check(struct mm_struct *mm,
362 					struct damon_region *r,
363 					struct damon_ctx *ctx)
364 {
365 	r->sampling_addr = damon_rand(ctx, r->ar.start, r->ar.end);
366 
367 	damon_va_mkold(mm, r->sampling_addr);
368 }
369 
370 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
371 {
372 	struct damon_target *t;
373 	struct mm_struct *mm;
374 	struct damon_region *r;
375 
376 	damon_for_each_target(t, ctx) {
377 		mm = damon_get_mm(t);
378 		if (!mm)
379 			continue;
380 		damon_for_each_region(r, t)
381 			__damon_va_prepare_access_check(mm, r, ctx);
382 		mmput(mm);
383 	}
384 }
385 
386 struct damon_young_walk_private {
387 	/* size of the folio for the access checked virtual memory address */
388 	unsigned long *folio_sz;
389 	bool young;
390 };
391 
392 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
393 		unsigned long next, struct mm_walk *walk)
394 {
395 	pte_t *pte;
396 	pte_t ptent;
397 	spinlock_t *ptl;
398 	struct folio *folio;
399 	struct damon_young_walk_private *priv = walk->private;
400 
401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
402 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
403 	if (ptl) {
404 		pmd_t pmde = pmdp_get(pmd);
405 
406 		if (!pmd_present(pmde))
407 			goto huge_out;
408 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
409 		if (!folio)
410 			goto huge_out;
411 		if (pmd_young(pmde) || !folio_test_idle(folio) ||
412 					mmu_notifier_test_young(walk->mm,
413 						addr))
414 			priv->young = true;
415 		*priv->folio_sz = HPAGE_PMD_SIZE;
416 huge_out:
417 		spin_unlock(ptl);
418 		return 0;
419 	}
420 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
421 
422 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
423 	if (!pte)
424 		return 0;
425 	ptent = ptep_get(pte);
426 	if (!pte_present(ptent))
427 		goto out;
428 	folio = vm_normal_folio(walk->vma, addr, ptent);
429 	if (!folio)
430 		goto out;
431 	if (pte_young(ptent) || !folio_test_idle(folio) ||
432 			mmu_notifier_test_young(walk->mm, addr))
433 		priv->young = true;
434 	*priv->folio_sz = folio_size(folio);
435 out:
436 	pte_unmap_unlock(pte, ptl);
437 	return 0;
438 }
439 
440 #ifdef CONFIG_HUGETLB_PAGE
441 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
442 				     unsigned long addr, unsigned long end,
443 				     struct mm_walk *walk)
444 {
445 	struct damon_young_walk_private *priv = walk->private;
446 	struct hstate *h = hstate_vma(walk->vma);
447 	struct folio *folio;
448 	spinlock_t *ptl;
449 	pte_t entry;
450 
451 	ptl = huge_pte_lock(h, walk->mm, pte);
452 	entry = huge_ptep_get(walk->mm, addr, pte);
453 	if (!pte_present(entry))
454 		goto out;
455 
456 	folio = pfn_folio(pte_pfn(entry));
457 	folio_get(folio);
458 
459 	if (pte_young(entry) || !folio_test_idle(folio) ||
460 	    mmu_notifier_test_young(walk->mm, addr))
461 		priv->young = true;
462 	*priv->folio_sz = huge_page_size(h);
463 
464 	folio_put(folio);
465 
466 out:
467 	spin_unlock(ptl);
468 	return 0;
469 }
470 #else
471 #define damon_young_hugetlb_entry NULL
472 #endif /* CONFIG_HUGETLB_PAGE */
473 
474 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
475 		unsigned long *folio_sz)
476 {
477 	struct damon_young_walk_private arg = {
478 		.folio_sz = folio_sz,
479 		.young = false,
480 	};
481 
482 	struct mm_walk_ops damon_young_ops = {
483 		.pmd_entry = damon_young_pmd_entry,
484 		.hugetlb_entry = damon_young_hugetlb_entry,
485 	};
486 
487 	damon_va_walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
488 	return arg.young;
489 }
490 
491 /*
492  * Check whether the region was accessed after the last preparation
493  *
494  * mm	'mm_struct' for the given virtual address space
495  * r	the region to be checked
496  */
497 static void __damon_va_check_access(struct mm_struct *mm,
498 				struct damon_region *r, bool same_target,
499 				struct damon_attrs *attrs)
500 {
501 	static unsigned long last_addr;
502 	static unsigned long last_folio_sz = PAGE_SIZE;
503 	static bool last_accessed;
504 
505 	if (!mm) {
506 		damon_update_region_access_rate(r, false, attrs);
507 		return;
508 	}
509 
510 	/* If the region is in the last checked page, reuse the result */
511 	if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
512 				ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
513 		damon_update_region_access_rate(r, last_accessed, attrs);
514 		return;
515 	}
516 
517 	last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
518 	damon_update_region_access_rate(r, last_accessed, attrs);
519 
520 	last_addr = r->sampling_addr;
521 }
522 
523 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
524 {
525 	struct damon_target *t;
526 	struct mm_struct *mm;
527 	struct damon_region *r;
528 	unsigned int max_nr_accesses = 0;
529 	bool same_target;
530 
531 	damon_for_each_target(t, ctx) {
532 		mm = damon_get_mm(t);
533 		same_target = false;
534 		damon_for_each_region(r, t) {
535 			__damon_va_check_access(mm, r, same_target,
536 					&ctx->attrs);
537 			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
538 			same_target = true;
539 		}
540 		if (mm)
541 			mmput(mm);
542 	}
543 
544 	return max_nr_accesses;
545 }
546 
547 static bool damos_va_filter_young_match(struct damos_filter *filter,
548 		struct folio *folio, struct vm_area_struct *vma,
549 		unsigned long addr, pte_t *ptep, pmd_t *pmdp)
550 {
551 	bool young = false;
552 
553 	if (ptep)
554 		young = pte_young(ptep_get(ptep));
555 	else if (pmdp)
556 		young = pmd_young(pmdp_get(pmdp));
557 
558 	young = young || !folio_test_idle(folio) ||
559 		mmu_notifier_test_young(vma->vm_mm, addr);
560 
561 	if (young && ptep)
562 		damon_ptep_mkold(ptep, vma, addr);
563 	else if (young && pmdp)
564 		damon_pmdp_mkold(pmdp, vma, addr);
565 
566 	return young == filter->matching;
567 }
568 
569 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio,
570 		struct vm_area_struct *vma, unsigned long addr,
571 		pte_t *ptep, pmd_t *pmdp)
572 {
573 	struct damos_filter *filter;
574 	bool matched;
575 
576 	if (scheme->core_filters_allowed)
577 		return false;
578 
579 	damos_for_each_ops_filter(filter, scheme) {
580 		/*
581 		 * damos_folio_filter_match checks the young filter by doing an
582 		 * rmap on the folio to find its page table. However, being the
583 		 * vaddr scheme, we have direct access to the page tables, so
584 		 * use that instead.
585 		 */
586 		if (filter->type == DAMOS_FILTER_TYPE_YOUNG)
587 			matched = damos_va_filter_young_match(filter, folio,
588 				vma, addr, ptep, pmdp);
589 		else
590 			matched = damos_folio_filter_match(filter, folio);
591 
592 		if (matched)
593 			return !filter->allow;
594 	}
595 	return scheme->ops_filters_default_reject;
596 }
597 
598 struct damos_va_migrate_private {
599 	struct list_head *migration_lists;
600 	struct damos *scheme;
601 };
602 
603 /*
604  * Place the given folio in the migration_list corresponding to where the folio
605  * should be migrated.
606  *
607  * The algorithm used here is similar to weighted_interleave_nid()
608  */
609 static void damos_va_migrate_dests_add(struct folio *folio,
610 		struct vm_area_struct *vma, unsigned long addr,
611 		struct damos_migrate_dests *dests,
612 		struct list_head *migration_lists)
613 {
614 	pgoff_t ilx;
615 	int order;
616 	unsigned int target;
617 	unsigned int weight_total = 0;
618 	int i;
619 
620 	/*
621 	 * If dests is empty, there is only one migration list corresponding
622 	 * to s->target_nid.
623 	 */
624 	if (!dests->nr_dests) {
625 		i = 0;
626 		goto isolate;
627 	}
628 
629 	order = folio_order(folio);
630 	ilx = vma->vm_pgoff >> order;
631 	ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
632 
633 	for (i = 0; i < dests->nr_dests; i++)
634 		weight_total += dests->weight_arr[i];
635 
636 	/* If the total weights are somehow 0, don't migrate at all */
637 	if (!weight_total)
638 		return;
639 
640 	target = ilx % weight_total;
641 	for (i = 0; i < dests->nr_dests; i++) {
642 		if (target < dests->weight_arr[i])
643 			break;
644 		target -= dests->weight_arr[i];
645 	}
646 
647 	/* If the folio is already in the right node, don't do anything */
648 	if (folio_nid(folio) == dests->node_id_arr[i])
649 		return;
650 
651 isolate:
652 	if (!folio_isolate_lru(folio))
653 		return;
654 
655 	list_add(&folio->lru, &migration_lists[i]);
656 }
657 
658 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr,
659 		unsigned long next, struct mm_walk *walk)
660 {
661 	struct damos_va_migrate_private *priv = walk->private;
662 	struct list_head *migration_lists = priv->migration_lists;
663 	struct damos *s = priv->scheme;
664 	struct damos_migrate_dests *dests = &s->migrate_dests;
665 	struct folio *folio;
666 	spinlock_t *ptl;
667 	pte_t *start_pte, *pte, ptent;
668 	int nr;
669 
670 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
671 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
672 	if (ptl) {
673 		pmd_t pmde = pmdp_get(pmd);
674 
675 		if (!pmd_present(pmde))
676 			goto huge_out;
677 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
678 		if (!folio)
679 			goto huge_out;
680 		if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd))
681 			goto huge_out;
682 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
683 				migration_lists);
684 huge_out:
685 		spin_unlock(ptl);
686 		return 0;
687 	}
688 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
689 
690 	start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
691 	if (!pte)
692 		return 0;
693 
694 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
695 		nr = 1;
696 		ptent = ptep_get(pte);
697 
698 		if (pte_none(ptent) || !pte_present(ptent))
699 			continue;
700 		folio = vm_normal_folio(walk->vma, addr, ptent);
701 		if (!folio)
702 			continue;
703 		if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL))
704 			continue;
705 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
706 				migration_lists);
707 		nr = folio_nr_pages(folio);
708 	}
709 	pte_unmap_unlock(start_pte, ptl);
710 	return 0;
711 }
712 
713 /*
714  * Functions for the target validity check and cleanup
715  */
716 
717 static bool damon_va_target_valid(struct damon_target *t)
718 {
719 	struct task_struct *task;
720 
721 	task = damon_get_task_struct(t);
722 	if (task) {
723 		put_task_struct(task);
724 		return true;
725 	}
726 
727 	return false;
728 }
729 
730 static void damon_va_cleanup_target(struct damon_target *t)
731 {
732 	put_pid(t->pid);
733 }
734 
735 #ifndef CONFIG_ADVISE_SYSCALLS
736 static unsigned long damos_madvise(struct damon_target *target,
737 		struct damon_region *r, int behavior)
738 {
739 	return 0;
740 }
741 #else
742 static unsigned long damos_madvise(struct damon_target *target,
743 		struct damon_region *r, int behavior)
744 {
745 	struct mm_struct *mm;
746 	unsigned long start = PAGE_ALIGN(r->ar.start);
747 	unsigned long len = PAGE_ALIGN(damon_sz_region(r));
748 	unsigned long applied;
749 
750 	mm = damon_get_mm(target);
751 	if (!mm)
752 		return 0;
753 
754 	applied = do_madvise(mm, start, len, behavior) ? 0 : len;
755 	mmput(mm);
756 
757 	return applied;
758 }
759 #endif	/* CONFIG_ADVISE_SYSCALLS */
760 
761 static unsigned long damos_va_migrate(struct damon_target *target,
762 		struct damon_region *r, struct damos *s,
763 		unsigned long *sz_filter_passed)
764 {
765 	LIST_HEAD(folio_list);
766 	struct damos_va_migrate_private priv;
767 	struct mm_struct *mm;
768 	int nr_dests;
769 	int nid;
770 	bool use_target_nid;
771 	unsigned long applied = 0;
772 	struct damos_migrate_dests *dests = &s->migrate_dests;
773 	struct mm_walk_ops walk_ops = {
774 		.pmd_entry = damos_va_migrate_pmd_entry,
775 		.pte_entry = NULL,
776 	};
777 
778 	use_target_nid = dests->nr_dests == 0;
779 	nr_dests = use_target_nid ? 1 : dests->nr_dests;
780 	priv.scheme = s;
781 	priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests);
782 	if (!priv.migration_lists)
783 		return 0;
784 
785 	for (int i = 0; i < nr_dests; i++)
786 		INIT_LIST_HEAD(&priv.migration_lists[i]);
787 
788 
789 	mm = damon_get_mm(target);
790 	if (!mm)
791 		goto free_lists;
792 
793 	damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
794 	mmput(mm);
795 
796 	for (int i = 0; i < nr_dests; i++) {
797 		nid = use_target_nid ? s->target_nid : dests->node_id_arr[i];
798 		applied += damon_migrate_pages(&priv.migration_lists[i], nid);
799 		cond_resched();
800 	}
801 
802 free_lists:
803 	kfree(priv.migration_lists);
804 	return applied * PAGE_SIZE;
805 }
806 
807 struct damos_va_stat_private {
808 	struct damos *scheme;
809 	unsigned long *sz_filter_passed;
810 };
811 
812 static inline bool damos_va_invalid_folio(struct folio *folio,
813 		struct damos *s)
814 {
815 	return !folio || folio == s->last_applied;
816 }
817 
818 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
819 		unsigned long next, struct mm_walk *walk)
820 {
821 	struct damos_va_stat_private *priv = walk->private;
822 	struct damos *s = priv->scheme;
823 	unsigned long *sz_filter_passed = priv->sz_filter_passed;
824 	struct vm_area_struct *vma = walk->vma;
825 	struct folio *folio;
826 	spinlock_t *ptl;
827 	pte_t *start_pte, *pte, ptent;
828 	int nr;
829 
830 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
831 	ptl = pmd_trans_huge_lock(pmd, vma);
832 	if (ptl) {
833 		pmd_t pmde = pmdp_get(pmd);
834 
835 		if (!pmd_present(pmde))
836 			goto huge_unlock;
837 
838 		folio = vm_normal_folio_pmd(vma, addr, pmde);
839 
840 		if (damos_va_invalid_folio(folio, s))
841 			goto huge_unlock;
842 
843 		if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd))
844 			*sz_filter_passed += folio_size(folio);
845 		s->last_applied = folio;
846 
847 huge_unlock:
848 		spin_unlock(ptl);
849 		return 0;
850 	}
851 #endif
852 	start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
853 	if (!start_pte)
854 		return 0;
855 
856 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
857 		nr = 1;
858 		ptent = ptep_get(pte);
859 
860 		if (pte_none(ptent) || !pte_present(ptent))
861 			continue;
862 
863 		folio = vm_normal_folio(vma, addr, ptent);
864 
865 		if (damos_va_invalid_folio(folio, s))
866 			continue;
867 
868 		if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL))
869 			*sz_filter_passed += folio_size(folio);
870 		nr = folio_nr_pages(folio);
871 		s->last_applied = folio;
872 	}
873 	pte_unmap_unlock(start_pte, ptl);
874 	return 0;
875 }
876 
877 static unsigned long damos_va_stat(struct damon_target *target,
878 		struct damon_region *r, struct damos *s,
879 		unsigned long *sz_filter_passed)
880 {
881 	struct damos_va_stat_private priv;
882 	struct mm_struct *mm;
883 	struct mm_walk_ops walk_ops = {
884 		.pmd_entry = damos_va_stat_pmd_entry,
885 	};
886 
887 	priv.scheme = s;
888 	priv.sz_filter_passed = sz_filter_passed;
889 
890 	if (!damos_ops_has_filter(s))
891 		return 0;
892 
893 	mm = damon_get_mm(target);
894 	if (!mm)
895 		return 0;
896 
897 	damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
898 	mmput(mm);
899 	return 0;
900 }
901 
902 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
903 		struct damon_target *t, struct damon_region *r,
904 		struct damos *scheme, unsigned long *sz_filter_passed)
905 {
906 	int madv_action;
907 
908 	switch (scheme->action) {
909 	case DAMOS_WILLNEED:
910 		madv_action = MADV_WILLNEED;
911 		break;
912 	case DAMOS_COLD:
913 		madv_action = MADV_COLD;
914 		break;
915 	case DAMOS_PAGEOUT:
916 		madv_action = MADV_PAGEOUT;
917 		break;
918 	case DAMOS_HUGEPAGE:
919 		madv_action = MADV_HUGEPAGE;
920 		break;
921 	case DAMOS_NOHUGEPAGE:
922 		madv_action = MADV_NOHUGEPAGE;
923 		break;
924 	case DAMOS_COLLAPSE:
925 		madv_action = MADV_COLLAPSE;
926 		break;
927 	case DAMOS_MIGRATE_HOT:
928 	case DAMOS_MIGRATE_COLD:
929 		return damos_va_migrate(t, r, scheme, sz_filter_passed);
930 	case DAMOS_STAT:
931 		return damos_va_stat(t, r, scheme, sz_filter_passed);
932 	default:
933 		/*
934 		 * DAMOS actions that are not yet supported by 'vaddr'.
935 		 */
936 		return 0;
937 	}
938 
939 	return damos_madvise(t, r, madv_action);
940 }
941 
942 static int damon_va_scheme_score(struct damon_ctx *context,
943 		struct damon_region *r, struct damos *scheme)
944 {
945 
946 	switch (scheme->action) {
947 	case DAMOS_PAGEOUT:
948 		return damon_cold_score(context, r, scheme);
949 	case DAMOS_MIGRATE_HOT:
950 		return damon_hot_score(context, r, scheme);
951 	case DAMOS_MIGRATE_COLD:
952 		return damon_cold_score(context, r, scheme);
953 	default:
954 		break;
955 	}
956 
957 	return DAMOS_MAX_SCORE;
958 }
959 
960 static int __init damon_va_initcall(void)
961 {
962 	struct damon_operations ops = {
963 		.id = DAMON_OPS_VADDR,
964 		.init = damon_va_init,
965 		.update = damon_va_update,
966 		.prepare_access_checks = damon_va_prepare_access_checks,
967 		.check_accesses = damon_va_check_accesses,
968 		.target_valid = damon_va_target_valid,
969 		.cleanup_target = damon_va_cleanup_target,
970 		.apply_scheme = damon_va_apply_scheme,
971 		.get_scheme_score = damon_va_scheme_score,
972 	};
973 	/* ops for fixed virtual address ranges */
974 	struct damon_operations ops_fvaddr = ops;
975 	int err;
976 
977 	/* Don't set the monitoring target regions for the entire mapping */
978 	ops_fvaddr.id = DAMON_OPS_FVADDR;
979 	ops_fvaddr.init = NULL;
980 	ops_fvaddr.update = NULL;
981 
982 	err = damon_register_ops(&ops);
983 	if (err)
984 		return err;
985 	return damon_register_ops(&ops_fvaddr);
986 };
987 
988 subsys_initcall(damon_va_initcall);
989 
990 #include "tests/vaddr-kunit.h"
991