xref: /linux/mm/damon/vaddr.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * DAMON Code for Virtual Address Spaces
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #define pr_fmt(fmt) "damon-va: " fmt
9 
10 #include <linux/highmem.h>
11 #include <linux/hugetlb.h>
12 #include <linux/mman.h>
13 #include <linux/mmu_notifier.h>
14 #include <linux/page_idle.h>
15 #include <linux/pagewalk.h>
16 #include <linux/sched/mm.h>
17 
18 #include "../internal.h"
19 #include "ops-common.h"
20 
21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
22 #undef DAMON_MIN_REGION_SZ
23 #define DAMON_MIN_REGION_SZ 1
24 #endif
25 
26 /*
27  * 't->pid' should be the pointer to the relevant 'struct pid' having reference
28  * count.  Caller must put the returned task, unless it is NULL.
29  */
30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
31 {
32 	return get_pid_task(t->pid, PIDTYPE_PID);
33 }
34 
35 /*
36  * Get the mm_struct of the given target
37  *
38  * Caller _must_ put the mm_struct after use, unless it is NULL.
39  *
40  * Returns the mm_struct of the target on success, NULL on failure
41  */
42 static struct mm_struct *damon_get_mm(struct damon_target *t)
43 {
44 	struct task_struct *task;
45 	struct mm_struct *mm;
46 
47 	task = damon_get_task_struct(t);
48 	if (!task)
49 		return NULL;
50 
51 	mm = get_task_mm(task);
52 	put_task_struct(task);
53 	return mm;
54 }
55 
56 static unsigned long sz_range(struct damon_addr_range *r)
57 {
58 	return r->end - r->start;
59 }
60 
61 /*
62  * Find three regions separated by two biggest unmapped regions
63  *
64  * vma		the head vma of the target address space
65  * regions	an array of three address ranges that results will be saved
66  *
67  * This function receives an address space and finds three regions in it which
68  * separated by the two biggest unmapped regions in the space.  Please refer to
69  * below comments of '__damon_va_init_regions()' function to know why this is
70  * necessary.
71  *
72  * Returns 0 if success, or negative error code otherwise.
73  */
74 static int __damon_va_three_regions(struct mm_struct *mm,
75 				       struct damon_addr_range regions[3])
76 {
77 	struct damon_addr_range first_gap = {0}, second_gap = {0};
78 	VMA_ITERATOR(vmi, mm, 0);
79 	struct vm_area_struct *vma, *prev = NULL;
80 	unsigned long start;
81 
82 	/*
83 	 * Find the two biggest gaps so that first_gap > second_gap > others.
84 	 * If this is too slow, it can be optimised to examine the maple
85 	 * tree gaps.
86 	 */
87 	rcu_read_lock();
88 	for_each_vma(vmi, vma) {
89 		unsigned long gap;
90 
91 		if (!prev) {
92 			start = vma->vm_start;
93 			goto next;
94 		}
95 		gap = vma->vm_start - prev->vm_end;
96 
97 		if (gap > sz_range(&first_gap)) {
98 			second_gap = first_gap;
99 			first_gap.start = prev->vm_end;
100 			first_gap.end = vma->vm_start;
101 		} else if (gap > sz_range(&second_gap)) {
102 			second_gap.start = prev->vm_end;
103 			second_gap.end = vma->vm_start;
104 		}
105 next:
106 		prev = vma;
107 	}
108 	rcu_read_unlock();
109 
110 	if (!sz_range(&second_gap) || !sz_range(&first_gap))
111 		return -EINVAL;
112 
113 	/* Sort the two biggest gaps by address */
114 	if (first_gap.start > second_gap.start)
115 		swap(first_gap, second_gap);
116 
117 	/* Store the result */
118 	regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ);
119 	regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ);
120 	regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ);
121 	regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ);
122 	regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ);
123 	regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ);
124 
125 	return 0;
126 }
127 
128 /*
129  * Get the three regions in the given target (task)
130  *
131  * Returns 0 on success, negative error code otherwise.
132  */
133 static int damon_va_three_regions(struct damon_target *t,
134 				struct damon_addr_range regions[3])
135 {
136 	struct mm_struct *mm;
137 	int rc;
138 
139 	mm = damon_get_mm(t);
140 	if (!mm)
141 		return -EINVAL;
142 
143 	mmap_read_lock(mm);
144 	rc = __damon_va_three_regions(mm, regions);
145 	mmap_read_unlock(mm);
146 
147 	mmput(mm);
148 	return rc;
149 }
150 
151 /*
152  * Initialize the monitoring target regions for the given target (task)
153  *
154  * t	the given target
155  *
156  * Because only a number of small portions of the entire address space
157  * is actually mapped to the memory and accessed, monitoring the unmapped
158  * regions is wasteful.  That said, because we can deal with small noises,
159  * tracking every mapping is not strictly required but could even incur a high
160  * overhead if the mapping frequently changes or the number of mappings is
161  * high.  The adaptive regions adjustment mechanism will further help to deal
162  * with the noise by simply identifying the unmapped areas as a region that
163  * has no access.  Moreover, applying the real mappings that would have many
164  * unmapped areas inside will make the adaptive mechanism quite complex.  That
165  * said, too huge unmapped areas inside the monitoring target should be removed
166  * to not take the time for the adaptive mechanism.
167  *
168  * For the reason, we convert the complex mappings to three distinct regions
169  * that cover every mapped area of the address space.  Also the two gaps
170  * between the three regions are the two biggest unmapped areas in the given
171  * address space.  In detail, this function first identifies the start and the
172  * end of the mappings and the two biggest unmapped areas of the address space.
173  * Then, it constructs the three regions as below:
174  *
175  *     [mappings[0]->start, big_two_unmapped_areas[0]->start)
176  *     [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
177  *     [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
178  *
179  * As usual memory map of processes is as below, the gap between the heap and
180  * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
181  * region and the stack will be two biggest unmapped regions.  Because these
182  * gaps are exceptionally huge areas in usual address space, excluding these
183  * two biggest unmapped regions will be sufficient to make a trade-off.
184  *
185  *   <heap>
186  *   <BIG UNMAPPED REGION 1>
187  *   <uppermost mmap()-ed region>
188  *   (other mmap()-ed regions and small unmapped regions)
189  *   <lowermost mmap()-ed region>
190  *   <BIG UNMAPPED REGION 2>
191  *   <stack>
192  */
193 static void __damon_va_init_regions(struct damon_ctx *ctx,
194 				     struct damon_target *t)
195 {
196 	struct damon_target *ti;
197 	struct damon_addr_range regions[3];
198 	int tidx = 0;
199 
200 	if (damon_va_three_regions(t, regions)) {
201 		damon_for_each_target(ti, ctx) {
202 			if (ti == t)
203 				break;
204 			tidx++;
205 		}
206 		pr_debug("Failed to get three regions of %dth target\n", tidx);
207 		return;
208 	}
209 
210 	damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ);
211 }
212 
213 /* Initialize '->regions_list' of every target (task) */
214 static void damon_va_init(struct damon_ctx *ctx)
215 {
216 	struct damon_target *t;
217 
218 	damon_for_each_target(t, ctx) {
219 		/* the user may set the target regions as they want */
220 		if (!damon_nr_regions(t))
221 			__damon_va_init_regions(ctx, t);
222 	}
223 }
224 
225 /*
226  * Update regions for current memory mappings
227  */
228 static void damon_va_update(struct damon_ctx *ctx)
229 {
230 	struct damon_addr_range three_regions[3];
231 	struct damon_target *t;
232 
233 	damon_for_each_target(t, ctx) {
234 		if (damon_va_three_regions(t, three_regions))
235 			continue;
236 		damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ);
237 	}
238 }
239 
240 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
241 		unsigned long next, struct mm_walk *walk)
242 {
243 	pte_t *pte;
244 	spinlock_t *ptl;
245 
246 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
247 	if (ptl) {
248 		pmd_t pmde = pmdp_get(pmd);
249 
250 		if (pmd_present(pmde))
251 			damon_pmdp_mkold(pmd, walk->vma, addr);
252 		spin_unlock(ptl);
253 		return 0;
254 	}
255 
256 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
257 	if (!pte)
258 		return 0;
259 	if (!pte_present(ptep_get(pte)))
260 		goto out;
261 	damon_ptep_mkold(pte, walk->vma, addr);
262 out:
263 	pte_unmap_unlock(pte, ptl);
264 	return 0;
265 }
266 
267 #ifdef CONFIG_HUGETLB_PAGE
268 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
269 				struct vm_area_struct *vma, unsigned long addr)
270 {
271 	bool referenced = false;
272 	pte_t entry = huge_ptep_get(mm, addr, pte);
273 	struct folio *folio = pfn_folio(pte_pfn(entry));
274 	unsigned long psize = huge_page_size(hstate_vma(vma));
275 
276 	folio_get(folio);
277 
278 	if (pte_young(entry)) {
279 		referenced = true;
280 		entry = pte_mkold(entry);
281 		set_huge_pte_at(mm, addr, pte, entry, psize);
282 	}
283 
284 	if (mmu_notifier_clear_young(mm, addr,
285 				     addr + huge_page_size(hstate_vma(vma))))
286 		referenced = true;
287 
288 	if (referenced)
289 		folio_set_young(folio);
290 
291 	folio_set_idle(folio);
292 	folio_put(folio);
293 }
294 
295 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
296 				     unsigned long addr, unsigned long end,
297 				     struct mm_walk *walk)
298 {
299 	struct hstate *h = hstate_vma(walk->vma);
300 	spinlock_t *ptl;
301 	pte_t entry;
302 
303 	ptl = huge_pte_lock(h, walk->mm, pte);
304 	entry = huge_ptep_get(walk->mm, addr, pte);
305 	if (!pte_present(entry))
306 		goto out;
307 
308 	damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
309 
310 out:
311 	spin_unlock(ptl);
312 	return 0;
313 }
314 #else
315 #define damon_mkold_hugetlb_entry NULL
316 #endif /* CONFIG_HUGETLB_PAGE */
317 
318 static const struct mm_walk_ops damon_mkold_ops = {
319 	.pmd_entry = damon_mkold_pmd_entry,
320 	.hugetlb_entry = damon_mkold_hugetlb_entry,
321 	.walk_lock = PGWALK_RDLOCK,
322 };
323 
324 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
325 {
326 	mmap_read_lock(mm);
327 	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
328 	mmap_read_unlock(mm);
329 }
330 
331 /*
332  * Functions for the access checking of the regions
333  */
334 
335 static void __damon_va_prepare_access_check(struct mm_struct *mm,
336 					struct damon_region *r)
337 {
338 	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
339 
340 	damon_va_mkold(mm, r->sampling_addr);
341 }
342 
343 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
344 {
345 	struct damon_target *t;
346 	struct mm_struct *mm;
347 	struct damon_region *r;
348 
349 	damon_for_each_target(t, ctx) {
350 		mm = damon_get_mm(t);
351 		if (!mm)
352 			continue;
353 		damon_for_each_region(r, t)
354 			__damon_va_prepare_access_check(mm, r);
355 		mmput(mm);
356 	}
357 }
358 
359 struct damon_young_walk_private {
360 	/* size of the folio for the access checked virtual memory address */
361 	unsigned long *folio_sz;
362 	bool young;
363 };
364 
365 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
366 		unsigned long next, struct mm_walk *walk)
367 {
368 	pte_t *pte;
369 	pte_t ptent;
370 	spinlock_t *ptl;
371 	struct folio *folio;
372 	struct damon_young_walk_private *priv = walk->private;
373 
374 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
375 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
376 	if (ptl) {
377 		pmd_t pmde = pmdp_get(pmd);
378 
379 		if (!pmd_present(pmde))
380 			goto huge_out;
381 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
382 		if (!folio)
383 			goto huge_out;
384 		if (pmd_young(pmde) || !folio_test_idle(folio) ||
385 					mmu_notifier_test_young(walk->mm,
386 						addr))
387 			priv->young = true;
388 		*priv->folio_sz = HPAGE_PMD_SIZE;
389 huge_out:
390 		spin_unlock(ptl);
391 		return 0;
392 	}
393 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
394 
395 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
396 	if (!pte)
397 		return 0;
398 	ptent = ptep_get(pte);
399 	if (!pte_present(ptent))
400 		goto out;
401 	folio = vm_normal_folio(walk->vma, addr, ptent);
402 	if (!folio)
403 		goto out;
404 	if (pte_young(ptent) || !folio_test_idle(folio) ||
405 			mmu_notifier_test_young(walk->mm, addr))
406 		priv->young = true;
407 	*priv->folio_sz = folio_size(folio);
408 out:
409 	pte_unmap_unlock(pte, ptl);
410 	return 0;
411 }
412 
413 #ifdef CONFIG_HUGETLB_PAGE
414 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
415 				     unsigned long addr, unsigned long end,
416 				     struct mm_walk *walk)
417 {
418 	struct damon_young_walk_private *priv = walk->private;
419 	struct hstate *h = hstate_vma(walk->vma);
420 	struct folio *folio;
421 	spinlock_t *ptl;
422 	pte_t entry;
423 
424 	ptl = huge_pte_lock(h, walk->mm, pte);
425 	entry = huge_ptep_get(walk->mm, addr, pte);
426 	if (!pte_present(entry))
427 		goto out;
428 
429 	folio = pfn_folio(pte_pfn(entry));
430 	folio_get(folio);
431 
432 	if (pte_young(entry) || !folio_test_idle(folio) ||
433 	    mmu_notifier_test_young(walk->mm, addr))
434 		priv->young = true;
435 	*priv->folio_sz = huge_page_size(h);
436 
437 	folio_put(folio);
438 
439 out:
440 	spin_unlock(ptl);
441 	return 0;
442 }
443 #else
444 #define damon_young_hugetlb_entry NULL
445 #endif /* CONFIG_HUGETLB_PAGE */
446 
447 static const struct mm_walk_ops damon_young_ops = {
448 	.pmd_entry = damon_young_pmd_entry,
449 	.hugetlb_entry = damon_young_hugetlb_entry,
450 	.walk_lock = PGWALK_RDLOCK,
451 };
452 
453 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
454 		unsigned long *folio_sz)
455 {
456 	struct damon_young_walk_private arg = {
457 		.folio_sz = folio_sz,
458 		.young = false,
459 	};
460 
461 	mmap_read_lock(mm);
462 	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
463 	mmap_read_unlock(mm);
464 	return arg.young;
465 }
466 
467 /*
468  * Check whether the region was accessed after the last preparation
469  *
470  * mm	'mm_struct' for the given virtual address space
471  * r	the region to be checked
472  */
473 static void __damon_va_check_access(struct mm_struct *mm,
474 				struct damon_region *r, bool same_target,
475 				struct damon_attrs *attrs)
476 {
477 	static unsigned long last_addr;
478 	static unsigned long last_folio_sz = PAGE_SIZE;
479 	static bool last_accessed;
480 
481 	if (!mm) {
482 		damon_update_region_access_rate(r, false, attrs);
483 		return;
484 	}
485 
486 	/* If the region is in the last checked page, reuse the result */
487 	if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
488 				ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
489 		damon_update_region_access_rate(r, last_accessed, attrs);
490 		return;
491 	}
492 
493 	last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
494 	damon_update_region_access_rate(r, last_accessed, attrs);
495 
496 	last_addr = r->sampling_addr;
497 }
498 
499 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
500 {
501 	struct damon_target *t;
502 	struct mm_struct *mm;
503 	struct damon_region *r;
504 	unsigned int max_nr_accesses = 0;
505 	bool same_target;
506 
507 	damon_for_each_target(t, ctx) {
508 		mm = damon_get_mm(t);
509 		same_target = false;
510 		damon_for_each_region(r, t) {
511 			__damon_va_check_access(mm, r, same_target,
512 					&ctx->attrs);
513 			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
514 			same_target = true;
515 		}
516 		if (mm)
517 			mmput(mm);
518 	}
519 
520 	return max_nr_accesses;
521 }
522 
523 static bool damos_va_filter_young_match(struct damos_filter *filter,
524 		struct folio *folio, struct vm_area_struct *vma,
525 		unsigned long addr, pte_t *ptep, pmd_t *pmdp)
526 {
527 	bool young = false;
528 
529 	if (ptep)
530 		young = pte_young(ptep_get(ptep));
531 	else if (pmdp)
532 		young = pmd_young(pmdp_get(pmdp));
533 
534 	young = young || !folio_test_idle(folio) ||
535 		mmu_notifier_test_young(vma->vm_mm, addr);
536 
537 	if (young && ptep)
538 		damon_ptep_mkold(ptep, vma, addr);
539 	else if (young && pmdp)
540 		damon_pmdp_mkold(pmdp, vma, addr);
541 
542 	return young == filter->matching;
543 }
544 
545 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio,
546 		struct vm_area_struct *vma, unsigned long addr,
547 		pte_t *ptep, pmd_t *pmdp)
548 {
549 	struct damos_filter *filter;
550 	bool matched;
551 
552 	if (scheme->core_filters_allowed)
553 		return false;
554 
555 	damos_for_each_ops_filter(filter, scheme) {
556 		/*
557 		 * damos_folio_filter_match checks the young filter by doing an
558 		 * rmap on the folio to find its page table. However, being the
559 		 * vaddr scheme, we have direct access to the page tables, so
560 		 * use that instead.
561 		 */
562 		if (filter->type == DAMOS_FILTER_TYPE_YOUNG)
563 			matched = damos_va_filter_young_match(filter, folio,
564 				vma, addr, ptep, pmdp);
565 		else
566 			matched = damos_folio_filter_match(filter, folio);
567 
568 		if (matched)
569 			return !filter->allow;
570 	}
571 	return scheme->ops_filters_default_reject;
572 }
573 
574 struct damos_va_migrate_private {
575 	struct list_head *migration_lists;
576 	struct damos *scheme;
577 };
578 
579 /*
580  * Place the given folio in the migration_list corresponding to where the folio
581  * should be migrated.
582  *
583  * The algorithm used here is similar to weighted_interleave_nid()
584  */
585 static void damos_va_migrate_dests_add(struct folio *folio,
586 		struct vm_area_struct *vma, unsigned long addr,
587 		struct damos_migrate_dests *dests,
588 		struct list_head *migration_lists)
589 {
590 	pgoff_t ilx;
591 	int order;
592 	unsigned int target;
593 	unsigned int weight_total = 0;
594 	int i;
595 
596 	/*
597 	 * If dests is empty, there is only one migration list corresponding
598 	 * to s->target_nid.
599 	 */
600 	if (!dests->nr_dests) {
601 		i = 0;
602 		goto isolate;
603 	}
604 
605 	order = folio_order(folio);
606 	ilx = vma->vm_pgoff >> order;
607 	ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
608 
609 	for (i = 0; i < dests->nr_dests; i++)
610 		weight_total += dests->weight_arr[i];
611 
612 	/* If the total weights are somehow 0, don't migrate at all */
613 	if (!weight_total)
614 		return;
615 
616 	target = ilx % weight_total;
617 	for (i = 0; i < dests->nr_dests; i++) {
618 		if (target < dests->weight_arr[i])
619 			break;
620 		target -= dests->weight_arr[i];
621 	}
622 
623 	/* If the folio is already in the right node, don't do anything */
624 	if (folio_nid(folio) == dests->node_id_arr[i])
625 		return;
626 
627 isolate:
628 	if (!folio_isolate_lru(folio))
629 		return;
630 
631 	list_add(&folio->lru, &migration_lists[i]);
632 }
633 
634 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr,
635 		unsigned long next, struct mm_walk *walk)
636 {
637 	struct damos_va_migrate_private *priv = walk->private;
638 	struct list_head *migration_lists = priv->migration_lists;
639 	struct damos *s = priv->scheme;
640 	struct damos_migrate_dests *dests = &s->migrate_dests;
641 	struct folio *folio;
642 	spinlock_t *ptl;
643 	pte_t *start_pte, *pte, ptent;
644 	int nr;
645 
646 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
647 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
648 	if (ptl) {
649 		pmd_t pmde = pmdp_get(pmd);
650 
651 		if (!pmd_present(pmde))
652 			goto huge_out;
653 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
654 		if (!folio)
655 			goto huge_out;
656 		if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd))
657 			goto huge_out;
658 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
659 				migration_lists);
660 huge_out:
661 		spin_unlock(ptl);
662 		return 0;
663 	}
664 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
665 
666 	start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
667 	if (!pte)
668 		return 0;
669 
670 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
671 		nr = 1;
672 		ptent = ptep_get(pte);
673 
674 		if (pte_none(ptent) || !pte_present(ptent))
675 			continue;
676 		folio = vm_normal_folio(walk->vma, addr, ptent);
677 		if (!folio)
678 			continue;
679 		if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL))
680 			continue;
681 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
682 				migration_lists);
683 		nr = folio_nr_pages(folio);
684 	}
685 	pte_unmap_unlock(start_pte, ptl);
686 	return 0;
687 }
688 
689 /*
690  * Functions for the target validity check and cleanup
691  */
692 
693 static bool damon_va_target_valid(struct damon_target *t)
694 {
695 	struct task_struct *task;
696 
697 	task = damon_get_task_struct(t);
698 	if (task) {
699 		put_task_struct(task);
700 		return true;
701 	}
702 
703 	return false;
704 }
705 
706 static void damon_va_cleanup_target(struct damon_target *t)
707 {
708 	put_pid(t->pid);
709 }
710 
711 #ifndef CONFIG_ADVISE_SYSCALLS
712 static unsigned long damos_madvise(struct damon_target *target,
713 		struct damon_region *r, int behavior)
714 {
715 	return 0;
716 }
717 #else
718 static unsigned long damos_madvise(struct damon_target *target,
719 		struct damon_region *r, int behavior)
720 {
721 	struct mm_struct *mm;
722 	unsigned long start = PAGE_ALIGN(r->ar.start);
723 	unsigned long len = PAGE_ALIGN(damon_sz_region(r));
724 	unsigned long applied;
725 
726 	mm = damon_get_mm(target);
727 	if (!mm)
728 		return 0;
729 
730 	applied = do_madvise(mm, start, len, behavior) ? 0 : len;
731 	mmput(mm);
732 
733 	return applied;
734 }
735 #endif	/* CONFIG_ADVISE_SYSCALLS */
736 
737 static unsigned long damos_va_migrate(struct damon_target *target,
738 		struct damon_region *r, struct damos *s,
739 		unsigned long *sz_filter_passed)
740 {
741 	LIST_HEAD(folio_list);
742 	struct damos_va_migrate_private priv;
743 	struct mm_struct *mm;
744 	int nr_dests;
745 	int nid;
746 	bool use_target_nid;
747 	unsigned long applied = 0;
748 	struct damos_migrate_dests *dests = &s->migrate_dests;
749 	struct mm_walk_ops walk_ops = {
750 		.pmd_entry = damos_va_migrate_pmd_entry,
751 		.pte_entry = NULL,
752 		.walk_lock = PGWALK_RDLOCK,
753 	};
754 
755 	use_target_nid = dests->nr_dests == 0;
756 	nr_dests = use_target_nid ? 1 : dests->nr_dests;
757 	priv.scheme = s;
758 	priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests);
759 	if (!priv.migration_lists)
760 		return 0;
761 
762 	for (int i = 0; i < nr_dests; i++)
763 		INIT_LIST_HEAD(&priv.migration_lists[i]);
764 
765 
766 	mm = damon_get_mm(target);
767 	if (!mm)
768 		goto free_lists;
769 
770 	mmap_read_lock(mm);
771 	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
772 	mmap_read_unlock(mm);
773 	mmput(mm);
774 
775 	for (int i = 0; i < nr_dests; i++) {
776 		nid = use_target_nid ? s->target_nid : dests->node_id_arr[i];
777 		applied += damon_migrate_pages(&priv.migration_lists[i], nid);
778 		cond_resched();
779 	}
780 
781 free_lists:
782 	kfree(priv.migration_lists);
783 	return applied * PAGE_SIZE;
784 }
785 
786 struct damos_va_stat_private {
787 	struct damos *scheme;
788 	unsigned long *sz_filter_passed;
789 };
790 
791 static inline bool damos_va_invalid_folio(struct folio *folio,
792 		struct damos *s)
793 {
794 	return !folio || folio == s->last_applied;
795 }
796 
797 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
798 		unsigned long next, struct mm_walk *walk)
799 {
800 	struct damos_va_stat_private *priv = walk->private;
801 	struct damos *s = priv->scheme;
802 	unsigned long *sz_filter_passed = priv->sz_filter_passed;
803 	struct vm_area_struct *vma = walk->vma;
804 	struct folio *folio;
805 	spinlock_t *ptl;
806 	pte_t *start_pte, *pte, ptent;
807 	int nr;
808 
809 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
810 	ptl = pmd_trans_huge_lock(pmd, vma);
811 	if (ptl) {
812 		pmd_t pmde = pmdp_get(pmd);
813 
814 		if (!pmd_present(pmde))
815 			goto huge_unlock;
816 
817 		folio = vm_normal_folio_pmd(vma, addr, pmde);
818 
819 		if (damos_va_invalid_folio(folio, s))
820 			goto huge_unlock;
821 
822 		if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd))
823 			*sz_filter_passed += folio_size(folio);
824 		s->last_applied = folio;
825 
826 huge_unlock:
827 		spin_unlock(ptl);
828 		return 0;
829 	}
830 #endif
831 	start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
832 	if (!start_pte)
833 		return 0;
834 
835 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
836 		nr = 1;
837 		ptent = ptep_get(pte);
838 
839 		if (pte_none(ptent) || !pte_present(ptent))
840 			continue;
841 
842 		folio = vm_normal_folio(vma, addr, ptent);
843 
844 		if (damos_va_invalid_folio(folio, s))
845 			continue;
846 
847 		if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL))
848 			*sz_filter_passed += folio_size(folio);
849 		nr = folio_nr_pages(folio);
850 		s->last_applied = folio;
851 	}
852 	pte_unmap_unlock(start_pte, ptl);
853 	return 0;
854 }
855 
856 static unsigned long damos_va_stat(struct damon_target *target,
857 		struct damon_region *r, struct damos *s,
858 		unsigned long *sz_filter_passed)
859 {
860 	struct damos_va_stat_private priv;
861 	struct mm_struct *mm;
862 	struct mm_walk_ops walk_ops = {
863 		.pmd_entry = damos_va_stat_pmd_entry,
864 		.walk_lock = PGWALK_RDLOCK,
865 	};
866 
867 	priv.scheme = s;
868 	priv.sz_filter_passed = sz_filter_passed;
869 
870 	if (!damos_ops_has_filter(s))
871 		return 0;
872 
873 	mm = damon_get_mm(target);
874 	if (!mm)
875 		return 0;
876 
877 	mmap_read_lock(mm);
878 	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
879 	mmap_read_unlock(mm);
880 	mmput(mm);
881 	return 0;
882 }
883 
884 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
885 		struct damon_target *t, struct damon_region *r,
886 		struct damos *scheme, unsigned long *sz_filter_passed)
887 {
888 	int madv_action;
889 
890 	switch (scheme->action) {
891 	case DAMOS_WILLNEED:
892 		madv_action = MADV_WILLNEED;
893 		break;
894 	case DAMOS_COLD:
895 		madv_action = MADV_COLD;
896 		break;
897 	case DAMOS_PAGEOUT:
898 		madv_action = MADV_PAGEOUT;
899 		break;
900 	case DAMOS_HUGEPAGE:
901 		madv_action = MADV_HUGEPAGE;
902 		break;
903 	case DAMOS_NOHUGEPAGE:
904 		madv_action = MADV_NOHUGEPAGE;
905 		break;
906 	case DAMOS_MIGRATE_HOT:
907 	case DAMOS_MIGRATE_COLD:
908 		return damos_va_migrate(t, r, scheme, sz_filter_passed);
909 	case DAMOS_STAT:
910 		return damos_va_stat(t, r, scheme, sz_filter_passed);
911 	default:
912 		/*
913 		 * DAMOS actions that are not yet supported by 'vaddr'.
914 		 */
915 		return 0;
916 	}
917 
918 	return damos_madvise(t, r, madv_action);
919 }
920 
921 static int damon_va_scheme_score(struct damon_ctx *context,
922 		struct damon_region *r, struct damos *scheme)
923 {
924 
925 	switch (scheme->action) {
926 	case DAMOS_PAGEOUT:
927 		return damon_cold_score(context, r, scheme);
928 	case DAMOS_MIGRATE_HOT:
929 		return damon_hot_score(context, r, scheme);
930 	case DAMOS_MIGRATE_COLD:
931 		return damon_cold_score(context, r, scheme);
932 	default:
933 		break;
934 	}
935 
936 	return DAMOS_MAX_SCORE;
937 }
938 
939 static int __init damon_va_initcall(void)
940 {
941 	struct damon_operations ops = {
942 		.id = DAMON_OPS_VADDR,
943 		.init = damon_va_init,
944 		.update = damon_va_update,
945 		.prepare_access_checks = damon_va_prepare_access_checks,
946 		.check_accesses = damon_va_check_accesses,
947 		.target_valid = damon_va_target_valid,
948 		.cleanup_target = damon_va_cleanup_target,
949 		.apply_scheme = damon_va_apply_scheme,
950 		.get_scheme_score = damon_va_scheme_score,
951 	};
952 	/* ops for fixed virtual address ranges */
953 	struct damon_operations ops_fvaddr = ops;
954 	int err;
955 
956 	/* Don't set the monitoring target regions for the entire mapping */
957 	ops_fvaddr.id = DAMON_OPS_FVADDR;
958 	ops_fvaddr.init = NULL;
959 	ops_fvaddr.update = NULL;
960 
961 	err = damon_register_ops(&ops);
962 	if (err)
963 		return err;
964 	return damon_register_ops(&ops_fvaddr);
965 };
966 
967 subsys_initcall(damon_va_initcall);
968 
969 #include "tests/vaddr-kunit.h"
970