xref: /linux/mm/damon/vaddr.c (revision f4e98954234b104c23902ee5bb4e59be6f9904a7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * DAMON Code for Virtual Address Spaces
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #define pr_fmt(fmt) "damon-va: " fmt
9 
10 #include <linux/highmem.h>
11 #include <linux/hugetlb.h>
12 #include <linux/mman.h>
13 #include <linux/mmu_notifier.h>
14 #include <linux/page_idle.h>
15 #include <linux/pagewalk.h>
16 #include <linux/sched/mm.h>
17 
18 #include "../internal.h"
19 #include "ops-common.h"
20 
21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
22 #undef DAMON_MIN_REGION_SZ
23 #define DAMON_MIN_REGION_SZ 1
24 #endif
25 
26 /*
27  * 't->pid' should be the pointer to the relevant 'struct pid' having reference
28  * count.  Caller must put the returned task, unless it is NULL.
29  */
30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
31 {
32 	return get_pid_task(t->pid, PIDTYPE_PID);
33 }
34 
35 /*
36  * Get the mm_struct of the given target
37  *
38  * Caller _must_ put the mm_struct after use, unless it is NULL.
39  *
40  * Returns the mm_struct of the target on success, NULL on failure
41  */
42 static struct mm_struct *damon_get_mm(struct damon_target *t)
43 {
44 	struct task_struct *task;
45 	struct mm_struct *mm;
46 
47 	task = damon_get_task_struct(t);
48 	if (!task)
49 		return NULL;
50 
51 	mm = get_task_mm(task);
52 	put_task_struct(task);
53 	return mm;
54 }
55 
56 static unsigned long sz_range(struct damon_addr_range *r)
57 {
58 	return r->end - r->start;
59 }
60 
61 /*
62  * Find three regions separated by two biggest unmapped regions
63  *
64  * vma		the head vma of the target address space
65  * regions	an array of three address ranges that results will be saved
66  *
67  * This function receives an address space and finds three regions in it which
68  * separated by the two biggest unmapped regions in the space.  Please refer to
69  * below comments of '__damon_va_init_regions()' function to know why this is
70  * necessary.
71  *
72  * Returns 0 if success, or negative error code otherwise.
73  */
74 static int __damon_va_three_regions(struct mm_struct *mm,
75 				       struct damon_addr_range regions[3])
76 {
77 	struct damon_addr_range first_gap = {0}, second_gap = {0};
78 	VMA_ITERATOR(vmi, mm, 0);
79 	struct vm_area_struct *vma, *prev = NULL;
80 	unsigned long start;
81 
82 	/*
83 	 * Find the two biggest gaps so that first_gap > second_gap > others.
84 	 * If this is too slow, it can be optimised to examine the maple
85 	 * tree gaps.
86 	 */
87 	rcu_read_lock();
88 	for_each_vma(vmi, vma) {
89 		unsigned long gap;
90 
91 		if (!prev) {
92 			start = vma->vm_start;
93 			goto next;
94 		}
95 		gap = vma->vm_start - prev->vm_end;
96 
97 		if (gap > sz_range(&first_gap)) {
98 			second_gap = first_gap;
99 			first_gap.start = prev->vm_end;
100 			first_gap.end = vma->vm_start;
101 		} else if (gap > sz_range(&second_gap)) {
102 			second_gap.start = prev->vm_end;
103 			second_gap.end = vma->vm_start;
104 		}
105 next:
106 		prev = vma;
107 	}
108 	rcu_read_unlock();
109 
110 	if (!sz_range(&second_gap) || !sz_range(&first_gap))
111 		return -EINVAL;
112 
113 	/* Sort the two biggest gaps by address */
114 	if (first_gap.start > second_gap.start)
115 		swap(first_gap, second_gap);
116 
117 	/* Store the result */
118 	regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ);
119 	regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ);
120 	regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ);
121 	regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ);
122 	regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ);
123 	regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ);
124 
125 	return 0;
126 }
127 
128 /*
129  * Get the three regions in the given target (task)
130  *
131  * Returns 0 on success, negative error code otherwise.
132  */
133 static int damon_va_three_regions(struct damon_target *t,
134 				struct damon_addr_range regions[3])
135 {
136 	struct mm_struct *mm;
137 	int rc;
138 
139 	mm = damon_get_mm(t);
140 	if (!mm)
141 		return -EINVAL;
142 
143 	mmap_read_lock(mm);
144 	rc = __damon_va_three_regions(mm, regions);
145 	mmap_read_unlock(mm);
146 
147 	mmput(mm);
148 	return rc;
149 }
150 
151 /*
152  * Initialize the monitoring target regions for the given target (task)
153  *
154  * t	the given target
155  *
156  * Because only a number of small portions of the entire address space
157  * is actually mapped to the memory and accessed, monitoring the unmapped
158  * regions is wasteful.  That said, because we can deal with small noises,
159  * tracking every mapping is not strictly required but could even incur a high
160  * overhead if the mapping frequently changes or the number of mappings is
161  * high.  The adaptive regions adjustment mechanism will further help to deal
162  * with the noise by simply identifying the unmapped areas as a region that
163  * has no access.  Moreover, applying the real mappings that would have many
164  * unmapped areas inside will make the adaptive mechanism quite complex.  That
165  * said, too huge unmapped areas inside the monitoring target should be removed
166  * to not take the time for the adaptive mechanism.
167  *
168  * For the reason, we convert the complex mappings to three distinct regions
169  * that cover every mapped area of the address space.  Also the two gaps
170  * between the three regions are the two biggest unmapped areas in the given
171  * address space.  In detail, this function first identifies the start and the
172  * end of the mappings and the two biggest unmapped areas of the address space.
173  * Then, it constructs the three regions as below:
174  *
175  *     [mappings[0]->start, big_two_unmapped_areas[0]->start)
176  *     [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
177  *     [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
178  *
179  * As usual memory map of processes is as below, the gap between the heap and
180  * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
181  * region and the stack will be two biggest unmapped regions.  Because these
182  * gaps are exceptionally huge areas in usual address space, excluding these
183  * two biggest unmapped regions will be sufficient to make a trade-off.
184  *
185  *   <heap>
186  *   <BIG UNMAPPED REGION 1>
187  *   <uppermost mmap()-ed region>
188  *   (other mmap()-ed regions and small unmapped regions)
189  *   <lowermost mmap()-ed region>
190  *   <BIG UNMAPPED REGION 2>
191  *   <stack>
192  */
193 static void __damon_va_init_regions(struct damon_ctx *ctx,
194 				     struct damon_target *t)
195 {
196 	struct damon_target *ti;
197 	struct damon_addr_range regions[3];
198 	int tidx = 0;
199 
200 	if (damon_va_three_regions(t, regions)) {
201 		damon_for_each_target(ti, ctx) {
202 			if (ti == t)
203 				break;
204 			tidx++;
205 		}
206 		pr_debug("Failed to get three regions of %dth target\n", tidx);
207 		return;
208 	}
209 
210 	damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ);
211 }
212 
213 /* Initialize '->regions_list' of every target (task) */
214 static void damon_va_init(struct damon_ctx *ctx)
215 {
216 	struct damon_target *t;
217 
218 	damon_for_each_target(t, ctx) {
219 		/* the user may set the target regions as they want */
220 		if (!damon_nr_regions(t))
221 			__damon_va_init_regions(ctx, t);
222 	}
223 }
224 
225 /*
226  * Update regions for current memory mappings
227  */
228 static void damon_va_update(struct damon_ctx *ctx)
229 {
230 	struct damon_addr_range three_regions[3];
231 	struct damon_target *t;
232 
233 	damon_for_each_target(t, ctx) {
234 		if (damon_va_three_regions(t, three_regions))
235 			continue;
236 		damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ);
237 	}
238 }
239 
240 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
241 		unsigned long next, struct mm_walk *walk)
242 {
243 	pte_t *pte;
244 	spinlock_t *ptl;
245 
246 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
247 	if (ptl) {
248 		pmd_t pmde = pmdp_get(pmd);
249 
250 		if (pmd_present(pmde))
251 			damon_pmdp_mkold(pmd, walk->vma, addr);
252 		spin_unlock(ptl);
253 		return 0;
254 	}
255 
256 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
257 	if (!pte)
258 		return 0;
259 	if (!pte_present(ptep_get(pte)))
260 		goto out;
261 	damon_ptep_mkold(pte, walk->vma, addr);
262 out:
263 	pte_unmap_unlock(pte, ptl);
264 	return 0;
265 }
266 
267 #ifdef CONFIG_HUGETLB_PAGE
268 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
269 				struct vm_area_struct *vma, unsigned long addr)
270 {
271 	bool referenced = false;
272 	pte_t entry = huge_ptep_get(mm, addr, pte);
273 	struct folio *folio = pfn_folio(pte_pfn(entry));
274 	unsigned long psize = huge_page_size(hstate_vma(vma));
275 
276 	folio_get(folio);
277 
278 	if (pte_young(entry)) {
279 		referenced = true;
280 		entry = pte_mkold(entry);
281 		set_huge_pte_at(mm, addr, pte, entry, psize);
282 	}
283 
284 	if (mmu_notifier_clear_young(mm, addr,
285 				     addr + huge_page_size(hstate_vma(vma))))
286 		referenced = true;
287 
288 	if (referenced)
289 		folio_set_young(folio);
290 
291 	folio_set_idle(folio);
292 	folio_put(folio);
293 }
294 
295 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
296 				     unsigned long addr, unsigned long end,
297 				     struct mm_walk *walk)
298 {
299 	struct hstate *h = hstate_vma(walk->vma);
300 	spinlock_t *ptl;
301 	pte_t entry;
302 
303 	ptl = huge_pte_lock(h, walk->mm, pte);
304 	entry = huge_ptep_get(walk->mm, addr, pte);
305 	if (!pte_present(entry))
306 		goto out;
307 
308 	damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
309 
310 out:
311 	spin_unlock(ptl);
312 	return 0;
313 }
314 #else
315 #define damon_mkold_hugetlb_entry NULL
316 #endif /* CONFIG_HUGETLB_PAGE */
317 
318 static const struct mm_walk_ops damon_mkold_ops = {
319 	.pmd_entry = damon_mkold_pmd_entry,
320 	.hugetlb_entry = damon_mkold_hugetlb_entry,
321 	.walk_lock = PGWALK_RDLOCK,
322 };
323 
324 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
325 {
326 	mmap_read_lock(mm);
327 	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
328 	mmap_read_unlock(mm);
329 }
330 
331 /*
332  * Functions for the access checking of the regions
333  */
334 
335 static void __damon_va_prepare_access_check(struct mm_struct *mm,
336 					struct damon_region *r,
337 					struct damon_ctx *ctx)
338 {
339 	r->sampling_addr = damon_rand(ctx, r->ar.start, r->ar.end);
340 
341 	damon_va_mkold(mm, r->sampling_addr);
342 }
343 
344 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
345 {
346 	struct damon_target *t;
347 	struct mm_struct *mm;
348 	struct damon_region *r;
349 
350 	damon_for_each_target(t, ctx) {
351 		mm = damon_get_mm(t);
352 		if (!mm)
353 			continue;
354 		damon_for_each_region(r, t)
355 			__damon_va_prepare_access_check(mm, r, ctx);
356 		mmput(mm);
357 	}
358 }
359 
360 struct damon_young_walk_private {
361 	/* size of the folio for the access checked virtual memory address */
362 	unsigned long *folio_sz;
363 	bool young;
364 };
365 
366 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
367 		unsigned long next, struct mm_walk *walk)
368 {
369 	pte_t *pte;
370 	pte_t ptent;
371 	spinlock_t *ptl;
372 	struct folio *folio;
373 	struct damon_young_walk_private *priv = walk->private;
374 
375 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
376 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
377 	if (ptl) {
378 		pmd_t pmde = pmdp_get(pmd);
379 
380 		if (!pmd_present(pmde))
381 			goto huge_out;
382 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
383 		if (!folio)
384 			goto huge_out;
385 		if (pmd_young(pmde) || !folio_test_idle(folio) ||
386 					mmu_notifier_test_young(walk->mm,
387 						addr))
388 			priv->young = true;
389 		*priv->folio_sz = HPAGE_PMD_SIZE;
390 huge_out:
391 		spin_unlock(ptl);
392 		return 0;
393 	}
394 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
395 
396 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
397 	if (!pte)
398 		return 0;
399 	ptent = ptep_get(pte);
400 	if (!pte_present(ptent))
401 		goto out;
402 	folio = vm_normal_folio(walk->vma, addr, ptent);
403 	if (!folio)
404 		goto out;
405 	if (pte_young(ptent) || !folio_test_idle(folio) ||
406 			mmu_notifier_test_young(walk->mm, addr))
407 		priv->young = true;
408 	*priv->folio_sz = folio_size(folio);
409 out:
410 	pte_unmap_unlock(pte, ptl);
411 	return 0;
412 }
413 
414 #ifdef CONFIG_HUGETLB_PAGE
415 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
416 				     unsigned long addr, unsigned long end,
417 				     struct mm_walk *walk)
418 {
419 	struct damon_young_walk_private *priv = walk->private;
420 	struct hstate *h = hstate_vma(walk->vma);
421 	struct folio *folio;
422 	spinlock_t *ptl;
423 	pte_t entry;
424 
425 	ptl = huge_pte_lock(h, walk->mm, pte);
426 	entry = huge_ptep_get(walk->mm, addr, pte);
427 	if (!pte_present(entry))
428 		goto out;
429 
430 	folio = pfn_folio(pte_pfn(entry));
431 	folio_get(folio);
432 
433 	if (pte_young(entry) || !folio_test_idle(folio) ||
434 	    mmu_notifier_test_young(walk->mm, addr))
435 		priv->young = true;
436 	*priv->folio_sz = huge_page_size(h);
437 
438 	folio_put(folio);
439 
440 out:
441 	spin_unlock(ptl);
442 	return 0;
443 }
444 #else
445 #define damon_young_hugetlb_entry NULL
446 #endif /* CONFIG_HUGETLB_PAGE */
447 
448 static const struct mm_walk_ops damon_young_ops = {
449 	.pmd_entry = damon_young_pmd_entry,
450 	.hugetlb_entry = damon_young_hugetlb_entry,
451 	.walk_lock = PGWALK_RDLOCK,
452 };
453 
454 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
455 		unsigned long *folio_sz)
456 {
457 	struct damon_young_walk_private arg = {
458 		.folio_sz = folio_sz,
459 		.young = false,
460 	};
461 
462 	mmap_read_lock(mm);
463 	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
464 	mmap_read_unlock(mm);
465 	return arg.young;
466 }
467 
468 /*
469  * Check whether the region was accessed after the last preparation
470  *
471  * mm	'mm_struct' for the given virtual address space
472  * r	the region to be checked
473  */
474 static void __damon_va_check_access(struct mm_struct *mm,
475 				struct damon_region *r, bool same_target,
476 				struct damon_attrs *attrs)
477 {
478 	static unsigned long last_addr;
479 	static unsigned long last_folio_sz = PAGE_SIZE;
480 	static bool last_accessed;
481 
482 	if (!mm) {
483 		damon_update_region_access_rate(r, false, attrs);
484 		return;
485 	}
486 
487 	/* If the region is in the last checked page, reuse the result */
488 	if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
489 				ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
490 		damon_update_region_access_rate(r, last_accessed, attrs);
491 		return;
492 	}
493 
494 	last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
495 	damon_update_region_access_rate(r, last_accessed, attrs);
496 
497 	last_addr = r->sampling_addr;
498 }
499 
500 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
501 {
502 	struct damon_target *t;
503 	struct mm_struct *mm;
504 	struct damon_region *r;
505 	unsigned int max_nr_accesses = 0;
506 	bool same_target;
507 
508 	damon_for_each_target(t, ctx) {
509 		mm = damon_get_mm(t);
510 		same_target = false;
511 		damon_for_each_region(r, t) {
512 			__damon_va_check_access(mm, r, same_target,
513 					&ctx->attrs);
514 			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
515 			same_target = true;
516 		}
517 		if (mm)
518 			mmput(mm);
519 	}
520 
521 	return max_nr_accesses;
522 }
523 
524 static bool damos_va_filter_young_match(struct damos_filter *filter,
525 		struct folio *folio, struct vm_area_struct *vma,
526 		unsigned long addr, pte_t *ptep, pmd_t *pmdp)
527 {
528 	bool young = false;
529 
530 	if (ptep)
531 		young = pte_young(ptep_get(ptep));
532 	else if (pmdp)
533 		young = pmd_young(pmdp_get(pmdp));
534 
535 	young = young || !folio_test_idle(folio) ||
536 		mmu_notifier_test_young(vma->vm_mm, addr);
537 
538 	if (young && ptep)
539 		damon_ptep_mkold(ptep, vma, addr);
540 	else if (young && pmdp)
541 		damon_pmdp_mkold(pmdp, vma, addr);
542 
543 	return young == filter->matching;
544 }
545 
546 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio,
547 		struct vm_area_struct *vma, unsigned long addr,
548 		pte_t *ptep, pmd_t *pmdp)
549 {
550 	struct damos_filter *filter;
551 	bool matched;
552 
553 	if (scheme->core_filters_allowed)
554 		return false;
555 
556 	damos_for_each_ops_filter(filter, scheme) {
557 		/*
558 		 * damos_folio_filter_match checks the young filter by doing an
559 		 * rmap on the folio to find its page table. However, being the
560 		 * vaddr scheme, we have direct access to the page tables, so
561 		 * use that instead.
562 		 */
563 		if (filter->type == DAMOS_FILTER_TYPE_YOUNG)
564 			matched = damos_va_filter_young_match(filter, folio,
565 				vma, addr, ptep, pmdp);
566 		else
567 			matched = damos_folio_filter_match(filter, folio);
568 
569 		if (matched)
570 			return !filter->allow;
571 	}
572 	return scheme->ops_filters_default_reject;
573 }
574 
575 struct damos_va_migrate_private {
576 	struct list_head *migration_lists;
577 	struct damos *scheme;
578 };
579 
580 /*
581  * Place the given folio in the migration_list corresponding to where the folio
582  * should be migrated.
583  *
584  * The algorithm used here is similar to weighted_interleave_nid()
585  */
586 static void damos_va_migrate_dests_add(struct folio *folio,
587 		struct vm_area_struct *vma, unsigned long addr,
588 		struct damos_migrate_dests *dests,
589 		struct list_head *migration_lists)
590 {
591 	pgoff_t ilx;
592 	int order;
593 	unsigned int target;
594 	unsigned int weight_total = 0;
595 	int i;
596 
597 	/*
598 	 * If dests is empty, there is only one migration list corresponding
599 	 * to s->target_nid.
600 	 */
601 	if (!dests->nr_dests) {
602 		i = 0;
603 		goto isolate;
604 	}
605 
606 	order = folio_order(folio);
607 	ilx = vma->vm_pgoff >> order;
608 	ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
609 
610 	for (i = 0; i < dests->nr_dests; i++)
611 		weight_total += dests->weight_arr[i];
612 
613 	/* If the total weights are somehow 0, don't migrate at all */
614 	if (!weight_total)
615 		return;
616 
617 	target = ilx % weight_total;
618 	for (i = 0; i < dests->nr_dests; i++) {
619 		if (target < dests->weight_arr[i])
620 			break;
621 		target -= dests->weight_arr[i];
622 	}
623 
624 	/* If the folio is already in the right node, don't do anything */
625 	if (folio_nid(folio) == dests->node_id_arr[i])
626 		return;
627 
628 isolate:
629 	if (!folio_isolate_lru(folio))
630 		return;
631 
632 	list_add(&folio->lru, &migration_lists[i]);
633 }
634 
635 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr,
636 		unsigned long next, struct mm_walk *walk)
637 {
638 	struct damos_va_migrate_private *priv = walk->private;
639 	struct list_head *migration_lists = priv->migration_lists;
640 	struct damos *s = priv->scheme;
641 	struct damos_migrate_dests *dests = &s->migrate_dests;
642 	struct folio *folio;
643 	spinlock_t *ptl;
644 	pte_t *start_pte, *pte, ptent;
645 	int nr;
646 
647 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
648 	ptl = pmd_trans_huge_lock(pmd, walk->vma);
649 	if (ptl) {
650 		pmd_t pmde = pmdp_get(pmd);
651 
652 		if (!pmd_present(pmde))
653 			goto huge_out;
654 		folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
655 		if (!folio)
656 			goto huge_out;
657 		if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd))
658 			goto huge_out;
659 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
660 				migration_lists);
661 huge_out:
662 		spin_unlock(ptl);
663 		return 0;
664 	}
665 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
666 
667 	start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
668 	if (!pte)
669 		return 0;
670 
671 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
672 		nr = 1;
673 		ptent = ptep_get(pte);
674 
675 		if (pte_none(ptent) || !pte_present(ptent))
676 			continue;
677 		folio = vm_normal_folio(walk->vma, addr, ptent);
678 		if (!folio)
679 			continue;
680 		if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL))
681 			continue;
682 		damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
683 				migration_lists);
684 		nr = folio_nr_pages(folio);
685 	}
686 	pte_unmap_unlock(start_pte, ptl);
687 	return 0;
688 }
689 
690 /*
691  * Functions for the target validity check and cleanup
692  */
693 
694 static bool damon_va_target_valid(struct damon_target *t)
695 {
696 	struct task_struct *task;
697 
698 	task = damon_get_task_struct(t);
699 	if (task) {
700 		put_task_struct(task);
701 		return true;
702 	}
703 
704 	return false;
705 }
706 
707 static void damon_va_cleanup_target(struct damon_target *t)
708 {
709 	put_pid(t->pid);
710 }
711 
712 #ifndef CONFIG_ADVISE_SYSCALLS
713 static unsigned long damos_madvise(struct damon_target *target,
714 		struct damon_region *r, int behavior)
715 {
716 	return 0;
717 }
718 #else
719 static unsigned long damos_madvise(struct damon_target *target,
720 		struct damon_region *r, int behavior)
721 {
722 	struct mm_struct *mm;
723 	unsigned long start = PAGE_ALIGN(r->ar.start);
724 	unsigned long len = PAGE_ALIGN(damon_sz_region(r));
725 	unsigned long applied;
726 
727 	mm = damon_get_mm(target);
728 	if (!mm)
729 		return 0;
730 
731 	applied = do_madvise(mm, start, len, behavior) ? 0 : len;
732 	mmput(mm);
733 
734 	return applied;
735 }
736 #endif	/* CONFIG_ADVISE_SYSCALLS */
737 
738 static unsigned long damos_va_migrate(struct damon_target *target,
739 		struct damon_region *r, struct damos *s,
740 		unsigned long *sz_filter_passed)
741 {
742 	LIST_HEAD(folio_list);
743 	struct damos_va_migrate_private priv;
744 	struct mm_struct *mm;
745 	int nr_dests;
746 	int nid;
747 	bool use_target_nid;
748 	unsigned long applied = 0;
749 	struct damos_migrate_dests *dests = &s->migrate_dests;
750 	struct mm_walk_ops walk_ops = {
751 		.pmd_entry = damos_va_migrate_pmd_entry,
752 		.pte_entry = NULL,
753 		.walk_lock = PGWALK_RDLOCK,
754 	};
755 
756 	use_target_nid = dests->nr_dests == 0;
757 	nr_dests = use_target_nid ? 1 : dests->nr_dests;
758 	priv.scheme = s;
759 	priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests);
760 	if (!priv.migration_lists)
761 		return 0;
762 
763 	for (int i = 0; i < nr_dests; i++)
764 		INIT_LIST_HEAD(&priv.migration_lists[i]);
765 
766 
767 	mm = damon_get_mm(target);
768 	if (!mm)
769 		goto free_lists;
770 
771 	mmap_read_lock(mm);
772 	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
773 	mmap_read_unlock(mm);
774 	mmput(mm);
775 
776 	for (int i = 0; i < nr_dests; i++) {
777 		nid = use_target_nid ? s->target_nid : dests->node_id_arr[i];
778 		applied += damon_migrate_pages(&priv.migration_lists[i], nid);
779 		cond_resched();
780 	}
781 
782 free_lists:
783 	kfree(priv.migration_lists);
784 	return applied * PAGE_SIZE;
785 }
786 
787 struct damos_va_stat_private {
788 	struct damos *scheme;
789 	unsigned long *sz_filter_passed;
790 };
791 
792 static inline bool damos_va_invalid_folio(struct folio *folio,
793 		struct damos *s)
794 {
795 	return !folio || folio == s->last_applied;
796 }
797 
798 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
799 		unsigned long next, struct mm_walk *walk)
800 {
801 	struct damos_va_stat_private *priv = walk->private;
802 	struct damos *s = priv->scheme;
803 	unsigned long *sz_filter_passed = priv->sz_filter_passed;
804 	struct vm_area_struct *vma = walk->vma;
805 	struct folio *folio;
806 	spinlock_t *ptl;
807 	pte_t *start_pte, *pte, ptent;
808 	int nr;
809 
810 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
811 	ptl = pmd_trans_huge_lock(pmd, vma);
812 	if (ptl) {
813 		pmd_t pmde = pmdp_get(pmd);
814 
815 		if (!pmd_present(pmde))
816 			goto huge_unlock;
817 
818 		folio = vm_normal_folio_pmd(vma, addr, pmde);
819 
820 		if (damos_va_invalid_folio(folio, s))
821 			goto huge_unlock;
822 
823 		if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd))
824 			*sz_filter_passed += folio_size(folio);
825 		s->last_applied = folio;
826 
827 huge_unlock:
828 		spin_unlock(ptl);
829 		return 0;
830 	}
831 #endif
832 	start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
833 	if (!start_pte)
834 		return 0;
835 
836 	for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
837 		nr = 1;
838 		ptent = ptep_get(pte);
839 
840 		if (pte_none(ptent) || !pte_present(ptent))
841 			continue;
842 
843 		folio = vm_normal_folio(vma, addr, ptent);
844 
845 		if (damos_va_invalid_folio(folio, s))
846 			continue;
847 
848 		if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL))
849 			*sz_filter_passed += folio_size(folio);
850 		nr = folio_nr_pages(folio);
851 		s->last_applied = folio;
852 	}
853 	pte_unmap_unlock(start_pte, ptl);
854 	return 0;
855 }
856 
857 static unsigned long damos_va_stat(struct damon_target *target,
858 		struct damon_region *r, struct damos *s,
859 		unsigned long *sz_filter_passed)
860 {
861 	struct damos_va_stat_private priv;
862 	struct mm_struct *mm;
863 	struct mm_walk_ops walk_ops = {
864 		.pmd_entry = damos_va_stat_pmd_entry,
865 		.walk_lock = PGWALK_RDLOCK,
866 	};
867 
868 	priv.scheme = s;
869 	priv.sz_filter_passed = sz_filter_passed;
870 
871 	if (!damos_ops_has_filter(s))
872 		return 0;
873 
874 	mm = damon_get_mm(target);
875 	if (!mm)
876 		return 0;
877 
878 	mmap_read_lock(mm);
879 	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
880 	mmap_read_unlock(mm);
881 	mmput(mm);
882 	return 0;
883 }
884 
885 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
886 		struct damon_target *t, struct damon_region *r,
887 		struct damos *scheme, unsigned long *sz_filter_passed)
888 {
889 	int madv_action;
890 
891 	switch (scheme->action) {
892 	case DAMOS_WILLNEED:
893 		madv_action = MADV_WILLNEED;
894 		break;
895 	case DAMOS_COLD:
896 		madv_action = MADV_COLD;
897 		break;
898 	case DAMOS_PAGEOUT:
899 		madv_action = MADV_PAGEOUT;
900 		break;
901 	case DAMOS_HUGEPAGE:
902 		madv_action = MADV_HUGEPAGE;
903 		break;
904 	case DAMOS_NOHUGEPAGE:
905 		madv_action = MADV_NOHUGEPAGE;
906 		break;
907 	case DAMOS_COLLAPSE:
908 		madv_action = MADV_COLLAPSE;
909 		break;
910 	case DAMOS_MIGRATE_HOT:
911 	case DAMOS_MIGRATE_COLD:
912 		return damos_va_migrate(t, r, scheme, sz_filter_passed);
913 	case DAMOS_STAT:
914 		return damos_va_stat(t, r, scheme, sz_filter_passed);
915 	default:
916 		/*
917 		 * DAMOS actions that are not yet supported by 'vaddr'.
918 		 */
919 		return 0;
920 	}
921 
922 	return damos_madvise(t, r, madv_action);
923 }
924 
925 static int damon_va_scheme_score(struct damon_ctx *context,
926 		struct damon_region *r, struct damos *scheme)
927 {
928 
929 	switch (scheme->action) {
930 	case DAMOS_PAGEOUT:
931 		return damon_cold_score(context, r, scheme);
932 	case DAMOS_MIGRATE_HOT:
933 		return damon_hot_score(context, r, scheme);
934 	case DAMOS_MIGRATE_COLD:
935 		return damon_cold_score(context, r, scheme);
936 	default:
937 		break;
938 	}
939 
940 	return DAMOS_MAX_SCORE;
941 }
942 
943 static int __init damon_va_initcall(void)
944 {
945 	struct damon_operations ops = {
946 		.id = DAMON_OPS_VADDR,
947 		.init = damon_va_init,
948 		.update = damon_va_update,
949 		.prepare_access_checks = damon_va_prepare_access_checks,
950 		.check_accesses = damon_va_check_accesses,
951 		.target_valid = damon_va_target_valid,
952 		.cleanup_target = damon_va_cleanup_target,
953 		.apply_scheme = damon_va_apply_scheme,
954 		.get_scheme_score = damon_va_scheme_score,
955 	};
956 	/* ops for fixed virtual address ranges */
957 	struct damon_operations ops_fvaddr = ops;
958 	int err;
959 
960 	/* Don't set the monitoring target regions for the entire mapping */
961 	ops_fvaddr.id = DAMON_OPS_FVADDR;
962 	ops_fvaddr.init = NULL;
963 	ops_fvaddr.update = NULL;
964 
965 	err = damon_register_ops(&ops);
966 	if (err)
967 		return err;
968 	return damon_register_ops(&ops_fvaddr);
969 };
970 
971 subsys_initcall(damon_va_initcall);
972 
973 #include "tests/vaddr-kunit.h"
974