xref: /linux/mm/damon/paddr.c (revision a58f3dcf20ea9e7e968ee8369fd782bbb53dff73)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * DAMON Primitives for The Physical Address Space
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #define pr_fmt(fmt) "damon-pa: " fmt
9 
10 #include <linux/mmu_notifier.h>
11 #include <linux/page_idle.h>
12 #include <linux/pagemap.h>
13 #include <linux/rmap.h>
14 #include <linux/swap.h>
15 #include <linux/memory-tiers.h>
16 #include <linux/migrate.h>
17 #include <linux/mm_inline.h>
18 
19 #include "../internal.h"
20 #include "ops-common.h"
21 
22 static bool damon_folio_mkold_one(struct folio *folio,
23 		struct vm_area_struct *vma, unsigned long addr, void *arg)
24 {
25 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
26 
27 	while (page_vma_mapped_walk(&pvmw)) {
28 		addr = pvmw.address;
29 		if (pvmw.pte)
30 			damon_ptep_mkold(pvmw.pte, vma, addr);
31 		else
32 			damon_pmdp_mkold(pvmw.pmd, vma, addr);
33 	}
34 	return true;
35 }
36 
37 static void damon_folio_mkold(struct folio *folio)
38 {
39 	struct rmap_walk_control rwc = {
40 		.rmap_one = damon_folio_mkold_one,
41 		.anon_lock = folio_lock_anon_vma_read,
42 	};
43 	bool need_lock;
44 
45 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
46 		folio_set_idle(folio);
47 		return;
48 	}
49 
50 	need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
51 	if (need_lock && !folio_trylock(folio))
52 		return;
53 
54 	rmap_walk(folio, &rwc);
55 
56 	if (need_lock)
57 		folio_unlock(folio);
58 
59 }
60 
61 static void damon_pa_mkold(unsigned long paddr)
62 {
63 	struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
64 
65 	if (!folio)
66 		return;
67 
68 	damon_folio_mkold(folio);
69 	folio_put(folio);
70 }
71 
72 static void __damon_pa_prepare_access_check(struct damon_region *r)
73 {
74 	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
75 
76 	damon_pa_mkold(r->sampling_addr);
77 }
78 
79 static void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
80 {
81 	struct damon_target *t;
82 	struct damon_region *r;
83 
84 	damon_for_each_target(t, ctx) {
85 		damon_for_each_region(r, t)
86 			__damon_pa_prepare_access_check(r);
87 	}
88 }
89 
90 static bool damon_folio_young_one(struct folio *folio,
91 		struct vm_area_struct *vma, unsigned long addr, void *arg)
92 {
93 	bool *accessed = arg;
94 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
95 	pte_t pte;
96 
97 	*accessed = false;
98 	while (page_vma_mapped_walk(&pvmw)) {
99 		addr = pvmw.address;
100 		if (pvmw.pte) {
101 			pte = ptep_get(pvmw.pte);
102 
103 			/*
104 			 * PFN swap PTEs, such as device-exclusive ones, that
105 			 * actually map pages are "old" from a CPU perspective.
106 			 * The MMU notifier takes care of any device aspects.
107 			 */
108 			*accessed = (pte_present(pte) && pte_young(pte)) ||
109 				!folio_test_idle(folio) ||
110 				mmu_notifier_test_young(vma->vm_mm, addr);
111 		} else {
112 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
113 			*accessed = pmd_young(pmdp_get(pvmw.pmd)) ||
114 				!folio_test_idle(folio) ||
115 				mmu_notifier_test_young(vma->vm_mm, addr);
116 #else
117 			WARN_ON_ONCE(1);
118 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
119 		}
120 		if (*accessed) {
121 			page_vma_mapped_walk_done(&pvmw);
122 			break;
123 		}
124 	}
125 
126 	/* If accessed, stop walking */
127 	return *accessed == false;
128 }
129 
130 static bool damon_folio_young(struct folio *folio)
131 {
132 	bool accessed = false;
133 	struct rmap_walk_control rwc = {
134 		.arg = &accessed,
135 		.rmap_one = damon_folio_young_one,
136 		.anon_lock = folio_lock_anon_vma_read,
137 	};
138 	bool need_lock;
139 
140 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
141 		if (folio_test_idle(folio))
142 			return false;
143 		else
144 			return true;
145 	}
146 
147 	need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
148 	if (need_lock && !folio_trylock(folio))
149 		return false;
150 
151 	rmap_walk(folio, &rwc);
152 
153 	if (need_lock)
154 		folio_unlock(folio);
155 
156 	return accessed;
157 }
158 
159 static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
160 {
161 	struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
162 	bool accessed;
163 
164 	if (!folio)
165 		return false;
166 
167 	accessed = damon_folio_young(folio);
168 	*folio_sz = folio_size(folio);
169 	folio_put(folio);
170 	return accessed;
171 }
172 
173 static void __damon_pa_check_access(struct damon_region *r,
174 		struct damon_attrs *attrs)
175 {
176 	static unsigned long last_addr;
177 	static unsigned long last_folio_sz = PAGE_SIZE;
178 	static bool last_accessed;
179 
180 	/* If the region is in the last checked page, reuse the result */
181 	if (ALIGN_DOWN(last_addr, last_folio_sz) ==
182 				ALIGN_DOWN(r->sampling_addr, last_folio_sz)) {
183 		damon_update_region_access_rate(r, last_accessed, attrs);
184 		return;
185 	}
186 
187 	last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz);
188 	damon_update_region_access_rate(r, last_accessed, attrs);
189 
190 	last_addr = r->sampling_addr;
191 }
192 
193 static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
194 {
195 	struct damon_target *t;
196 	struct damon_region *r;
197 	unsigned int max_nr_accesses = 0;
198 
199 	damon_for_each_target(t, ctx) {
200 		damon_for_each_region(r, t) {
201 			__damon_pa_check_access(r, &ctx->attrs);
202 			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
203 		}
204 	}
205 
206 	return max_nr_accesses;
207 }
208 
209 static bool damos_pa_filter_match(struct damos_filter *filter,
210 		struct folio *folio)
211 {
212 	bool matched = false;
213 	struct mem_cgroup *memcg;
214 	size_t folio_sz;
215 
216 	switch (filter->type) {
217 	case DAMOS_FILTER_TYPE_ANON:
218 		matched = folio_test_anon(folio);
219 		break;
220 	case DAMOS_FILTER_TYPE_MEMCG:
221 		rcu_read_lock();
222 		memcg = folio_memcg_check(folio);
223 		if (!memcg)
224 			matched = false;
225 		else
226 			matched = filter->memcg_id == mem_cgroup_id(memcg);
227 		rcu_read_unlock();
228 		break;
229 	case DAMOS_FILTER_TYPE_YOUNG:
230 		matched = damon_folio_young(folio);
231 		if (matched)
232 			damon_folio_mkold(folio);
233 		break;
234 	case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
235 		folio_sz = folio_size(folio);
236 		matched = filter->sz_range.min <= folio_sz &&
237 			  folio_sz <= filter->sz_range.max;
238 		break;
239 	case DAMOS_FILTER_TYPE_UNMAPPED:
240 		matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
241 		break;
242 	default:
243 		break;
244 	}
245 
246 	return matched == filter->matching;
247 }
248 
249 /*
250  * damos_pa_filter_out - Return true if the page should be filtered out.
251  */
252 static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
253 {
254 	struct damos_filter *filter;
255 
256 	if (scheme->core_filters_allowed)
257 		return false;
258 
259 	damos_for_each_filter(filter, scheme) {
260 		if (damos_pa_filter_match(filter, folio))
261 			return !filter->allow;
262 	}
263 	return false;
264 }
265 
266 static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
267 {
268 	if (!folio)
269 		return true;
270 	if (folio == s->last_applied) {
271 		folio_put(folio);
272 		return true;
273 	}
274 	return false;
275 }
276 
277 static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
278 		unsigned long *sz_filter_passed)
279 {
280 	unsigned long addr, applied;
281 	LIST_HEAD(folio_list);
282 	bool install_young_filter = true;
283 	struct damos_filter *filter;
284 	struct folio *folio;
285 
286 	/* check access in page level again by default */
287 	damos_for_each_filter(filter, s) {
288 		if (filter->type == DAMOS_FILTER_TYPE_YOUNG) {
289 			install_young_filter = false;
290 			break;
291 		}
292 	}
293 	if (install_young_filter) {
294 		filter = damos_new_filter(
295 				DAMOS_FILTER_TYPE_YOUNG, true, false);
296 		if (!filter)
297 			return 0;
298 		damos_add_filter(s, filter);
299 	}
300 
301 	addr = r->ar.start;
302 	while (addr < r->ar.end) {
303 		folio = damon_get_folio(PHYS_PFN(addr));
304 		if (damon_pa_invalid_damos_folio(folio, s)) {
305 			addr += PAGE_SIZE;
306 			continue;
307 		}
308 
309 		if (damos_pa_filter_out(s, folio))
310 			goto put_folio;
311 		else
312 			*sz_filter_passed += folio_size(folio);
313 
314 		folio_clear_referenced(folio);
315 		folio_test_clear_young(folio);
316 		if (!folio_isolate_lru(folio))
317 			goto put_folio;
318 		if (folio_test_unevictable(folio))
319 			folio_putback_lru(folio);
320 		else
321 			list_add(&folio->lru, &folio_list);
322 put_folio:
323 		addr += folio_size(folio);
324 		folio_put(folio);
325 	}
326 	if (install_young_filter)
327 		damos_destroy_filter(filter);
328 	applied = reclaim_pages(&folio_list);
329 	cond_resched();
330 	s->last_applied = folio;
331 	return applied * PAGE_SIZE;
332 }
333 
334 static inline unsigned long damon_pa_mark_accessed_or_deactivate(
335 		struct damon_region *r, struct damos *s, bool mark_accessed,
336 		unsigned long *sz_filter_passed)
337 {
338 	unsigned long addr, applied = 0;
339 	struct folio *folio;
340 
341 	addr = r->ar.start;
342 	while (addr < r->ar.end) {
343 		folio = damon_get_folio(PHYS_PFN(addr));
344 		if (damon_pa_invalid_damos_folio(folio, s)) {
345 			addr += PAGE_SIZE;
346 			continue;
347 		}
348 
349 		if (damos_pa_filter_out(s, folio))
350 			goto put_folio;
351 		else
352 			*sz_filter_passed += folio_size(folio);
353 
354 		if (mark_accessed)
355 			folio_mark_accessed(folio);
356 		else
357 			folio_deactivate(folio);
358 		applied += folio_nr_pages(folio);
359 put_folio:
360 		addr += folio_size(folio);
361 		folio_put(folio);
362 	}
363 	s->last_applied = folio;
364 	return applied * PAGE_SIZE;
365 }
366 
367 static unsigned long damon_pa_mark_accessed(struct damon_region *r,
368 	struct damos *s, unsigned long *sz_filter_passed)
369 {
370 	return damon_pa_mark_accessed_or_deactivate(r, s, true,
371 			sz_filter_passed);
372 }
373 
374 static unsigned long damon_pa_deactivate_pages(struct damon_region *r,
375 	struct damos *s, unsigned long *sz_filter_passed)
376 {
377 	return damon_pa_mark_accessed_or_deactivate(r, s, false,
378 			sz_filter_passed);
379 }
380 
381 static unsigned int __damon_pa_migrate_folio_list(
382 		struct list_head *migrate_folios, struct pglist_data *pgdat,
383 		int target_nid)
384 {
385 	unsigned int nr_succeeded = 0;
386 	nodemask_t allowed_mask = NODE_MASK_NONE;
387 	struct migration_target_control mtc = {
388 		/*
389 		 * Allocate from 'node', or fail quickly and quietly.
390 		 * When this happens, 'page' will likely just be discarded
391 		 * instead of migrated.
392 		 */
393 		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
394 			__GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT,
395 		.nid = target_nid,
396 		.nmask = &allowed_mask
397 	};
398 
399 	if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
400 		return 0;
401 
402 	if (list_empty(migrate_folios))
403 		return 0;
404 
405 	/* Migration ignores all cpuset and mempolicy settings */
406 	migrate_pages(migrate_folios, alloc_migrate_folio, NULL,
407 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
408 		      &nr_succeeded);
409 
410 	return nr_succeeded;
411 }
412 
413 static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list,
414 						struct pglist_data *pgdat,
415 						int target_nid)
416 {
417 	unsigned int nr_migrated = 0;
418 	struct folio *folio;
419 	LIST_HEAD(ret_folios);
420 	LIST_HEAD(migrate_folios);
421 
422 	while (!list_empty(folio_list)) {
423 		struct folio *folio;
424 
425 		cond_resched();
426 
427 		folio = lru_to_folio(folio_list);
428 		list_del(&folio->lru);
429 
430 		if (!folio_trylock(folio))
431 			goto keep;
432 
433 		/* Relocate its contents to another node. */
434 		list_add(&folio->lru, &migrate_folios);
435 		folio_unlock(folio);
436 		continue;
437 keep:
438 		list_add(&folio->lru, &ret_folios);
439 	}
440 	/* 'folio_list' is always empty here */
441 
442 	/* Migrate folios selected for migration */
443 	nr_migrated += __damon_pa_migrate_folio_list(
444 			&migrate_folios, pgdat, target_nid);
445 	/*
446 	 * Folios that could not be migrated are still in @migrate_folios.  Add
447 	 * those back on @folio_list
448 	 */
449 	if (!list_empty(&migrate_folios))
450 		list_splice_init(&migrate_folios, folio_list);
451 
452 	try_to_unmap_flush();
453 
454 	list_splice(&ret_folios, folio_list);
455 
456 	while (!list_empty(folio_list)) {
457 		folio = lru_to_folio(folio_list);
458 		list_del(&folio->lru);
459 		folio_putback_lru(folio);
460 	}
461 
462 	return nr_migrated;
463 }
464 
465 static unsigned long damon_pa_migrate_pages(struct list_head *folio_list,
466 					    int target_nid)
467 {
468 	int nid;
469 	unsigned long nr_migrated = 0;
470 	LIST_HEAD(node_folio_list);
471 	unsigned int noreclaim_flag;
472 
473 	if (list_empty(folio_list))
474 		return nr_migrated;
475 
476 	noreclaim_flag = memalloc_noreclaim_save();
477 
478 	nid = folio_nid(lru_to_folio(folio_list));
479 	do {
480 		struct folio *folio = lru_to_folio(folio_list);
481 
482 		if (nid == folio_nid(folio)) {
483 			list_move(&folio->lru, &node_folio_list);
484 			continue;
485 		}
486 
487 		nr_migrated += damon_pa_migrate_folio_list(&node_folio_list,
488 							   NODE_DATA(nid),
489 							   target_nid);
490 		nid = folio_nid(lru_to_folio(folio_list));
491 	} while (!list_empty(folio_list));
492 
493 	nr_migrated += damon_pa_migrate_folio_list(&node_folio_list,
494 						   NODE_DATA(nid),
495 						   target_nid);
496 
497 	memalloc_noreclaim_restore(noreclaim_flag);
498 
499 	return nr_migrated;
500 }
501 
502 static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
503 		unsigned long *sz_filter_passed)
504 {
505 	unsigned long addr, applied;
506 	LIST_HEAD(folio_list);
507 	struct folio *folio;
508 
509 	addr = r->ar.start;
510 	while (addr < r->ar.end) {
511 		folio = damon_get_folio(PHYS_PFN(addr));
512 		if (damon_pa_invalid_damos_folio(folio, s)) {
513 			addr += PAGE_SIZE;
514 			continue;
515 		}
516 
517 		if (damos_pa_filter_out(s, folio))
518 			goto put_folio;
519 		else
520 			*sz_filter_passed += folio_size(folio);
521 
522 		if (!folio_isolate_lru(folio))
523 			goto put_folio;
524 		list_add(&folio->lru, &folio_list);
525 put_folio:
526 		addr += folio_size(folio);
527 		folio_put(folio);
528 	}
529 	applied = damon_pa_migrate_pages(&folio_list, s->target_nid);
530 	cond_resched();
531 	s->last_applied = folio;
532 	return applied * PAGE_SIZE;
533 }
534 
535 static bool damon_pa_scheme_has_filter(struct damos *s)
536 {
537 	struct damos_filter *f;
538 
539 	damos_for_each_filter(f, s)
540 		return true;
541 	return false;
542 }
543 
544 static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
545 		unsigned long *sz_filter_passed)
546 {
547 	unsigned long addr;
548 	LIST_HEAD(folio_list);
549 	struct folio *folio;
550 
551 	if (!damon_pa_scheme_has_filter(s))
552 		return 0;
553 
554 	addr = r->ar.start;
555 	while (addr < r->ar.end) {
556 		folio = damon_get_folio(PHYS_PFN(addr));
557 		if (damon_pa_invalid_damos_folio(folio, s)) {
558 			addr += PAGE_SIZE;
559 			continue;
560 		}
561 
562 		if (!damos_pa_filter_out(s, folio))
563 			*sz_filter_passed += folio_size(folio);
564 		addr += folio_size(folio);
565 		folio_put(folio);
566 	}
567 	s->last_applied = folio;
568 	return 0;
569 }
570 
571 static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
572 		struct damon_target *t, struct damon_region *r,
573 		struct damos *scheme, unsigned long *sz_filter_passed)
574 {
575 	switch (scheme->action) {
576 	case DAMOS_PAGEOUT:
577 		return damon_pa_pageout(r, scheme, sz_filter_passed);
578 	case DAMOS_LRU_PRIO:
579 		return damon_pa_mark_accessed(r, scheme, sz_filter_passed);
580 	case DAMOS_LRU_DEPRIO:
581 		return damon_pa_deactivate_pages(r, scheme, sz_filter_passed);
582 	case DAMOS_MIGRATE_HOT:
583 	case DAMOS_MIGRATE_COLD:
584 		return damon_pa_migrate(r, scheme, sz_filter_passed);
585 	case DAMOS_STAT:
586 		return damon_pa_stat(r, scheme, sz_filter_passed);
587 	default:
588 		/* DAMOS actions that not yet supported by 'paddr'. */
589 		break;
590 	}
591 	return 0;
592 }
593 
594 static int damon_pa_scheme_score(struct damon_ctx *context,
595 		struct damon_target *t, struct damon_region *r,
596 		struct damos *scheme)
597 {
598 	switch (scheme->action) {
599 	case DAMOS_PAGEOUT:
600 		return damon_cold_score(context, r, scheme);
601 	case DAMOS_LRU_PRIO:
602 		return damon_hot_score(context, r, scheme);
603 	case DAMOS_LRU_DEPRIO:
604 		return damon_cold_score(context, r, scheme);
605 	case DAMOS_MIGRATE_HOT:
606 		return damon_hot_score(context, r, scheme);
607 	case DAMOS_MIGRATE_COLD:
608 		return damon_cold_score(context, r, scheme);
609 	default:
610 		break;
611 	}
612 
613 	return DAMOS_MAX_SCORE;
614 }
615 
616 static int __init damon_pa_initcall(void)
617 {
618 	struct damon_operations ops = {
619 		.id = DAMON_OPS_PADDR,
620 		.init = NULL,
621 		.update = NULL,
622 		.prepare_access_checks = damon_pa_prepare_access_checks,
623 		.check_accesses = damon_pa_check_accesses,
624 		.reset_aggregated = NULL,
625 		.target_valid = NULL,
626 		.cleanup = NULL,
627 		.apply_scheme = damon_pa_apply_scheme,
628 		.get_scheme_score = damon_pa_scheme_score,
629 	};
630 
631 	return damon_register_ops(&ops);
632 };
633 
634 subsys_initcall(damon_pa_initcall);
635