xref: /linux/mm/damon/ops-common.c (revision badfa4361cb116fd9af71aaa2ea470236a8aa25b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Common Code for Data Access Monitoring
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #include <linux/migrate.h>
9 #include <linux/mmu_notifier.h>
10 #include <linux/page_idle.h>
11 #include <linux/pagemap.h>
12 #include <linux/rmap.h>
13 #include <linux/swap.h>
14 #include <linux/swapops.h>
15 
16 #include "../internal.h"
17 #include "ops-common.h"
18 
19 /*
20  * Get an online page for a pfn if it's in the LRU list.  Otherwise, returns
21  * NULL.
22  *
23  * The body of this function is stolen from the 'page_idle_get_folio()'.  We
24  * steal rather than reuse it because the code is quite simple.
25  */
26 struct folio *damon_get_folio(unsigned long pfn)
27 {
28 	struct page *page = pfn_to_online_page(pfn);
29 	struct folio *folio;
30 
31 	if (!page)
32 		return NULL;
33 
34 	folio = page_folio(page);
35 	if (!folio_test_lru(folio) || !folio_try_get(folio))
36 		return NULL;
37 	if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
38 		folio_put(folio);
39 		folio = NULL;
40 	}
41 	return folio;
42 }
43 
44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
45 {
46 	pte_t pteval = ptep_get(pte);
47 	struct folio *folio;
48 	bool young = false;
49 	unsigned long pfn;
50 
51 	if (likely(pte_present(pteval)))
52 		pfn = pte_pfn(pteval);
53 	else
54 		pfn = swp_offset_pfn(pte_to_swp_entry(pteval));
55 
56 	folio = damon_get_folio(pfn);
57 	if (!folio)
58 		return;
59 
60 	/*
61 	 * PFN swap PTEs, such as device-exclusive ones, that actually map pages
62 	 * are "old" from a CPU perspective. The MMU notifier takes care of any
63 	 * device aspects.
64 	 */
65 	if (likely(pte_present(pteval)))
66 		young |= ptep_test_and_clear_young(vma, addr, pte);
67 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
68 	if (young)
69 		folio_set_young(folio);
70 
71 	folio_set_idle(folio);
72 	folio_put(folio);
73 }
74 
75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
76 {
77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
78 	struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd)));
79 
80 	if (!folio)
81 		return;
82 
83 	if (pmdp_clear_young_notify(vma, addr, pmd))
84 		folio_set_young(folio);
85 
86 	folio_set_idle(folio);
87 	folio_put(folio);
88 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
89 }
90 
91 #define DAMON_MAX_SUBSCORE	(100)
92 #define DAMON_MAX_AGE_IN_LOG	(32)
93 
94 int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
95 			struct damos *s)
96 {
97 	int freq_subscore;
98 	unsigned int age_in_sec;
99 	int age_in_log, age_subscore;
100 	unsigned int freq_weight = s->quota.weight_nr_accesses;
101 	unsigned int age_weight = s->quota.weight_age;
102 	int hotness;
103 
104 	freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
105 		damon_max_nr_accesses(&c->attrs);
106 
107 	age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
108 	for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
109 			age_in_log++, age_in_sec >>= 1)
110 		;
111 
112 	/* If frequency is 0, higher age means it's colder */
113 	if (freq_subscore == 0)
114 		age_in_log *= -1;
115 
116 	/*
117 	 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
118 	 * Scale it to be in [0, 100] and set it as age subscore.
119 	 */
120 	age_in_log += DAMON_MAX_AGE_IN_LOG;
121 	age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
122 		DAMON_MAX_AGE_IN_LOG / 2;
123 
124 	hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
125 	if (freq_weight + age_weight)
126 		hotness /= freq_weight + age_weight;
127 	/*
128 	 * Transform it to fit in [0, DAMOS_MAX_SCORE]
129 	 */
130 	hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
131 
132 	return hotness;
133 }
134 
135 int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
136 			struct damos *s)
137 {
138 	int hotness = damon_hot_score(c, r, s);
139 
140 	/* Return coldness of the region */
141 	return DAMOS_MAX_SCORE - hotness;
142 }
143 
144 static bool damon_folio_mkold_one(struct folio *folio,
145 		struct vm_area_struct *vma, unsigned long addr, void *arg)
146 {
147 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
148 
149 	while (page_vma_mapped_walk(&pvmw)) {
150 		addr = pvmw.address;
151 		if (pvmw.pte)
152 			damon_ptep_mkold(pvmw.pte, vma, addr);
153 		else
154 			damon_pmdp_mkold(pvmw.pmd, vma, addr);
155 	}
156 	return true;
157 }
158 
159 void damon_folio_mkold(struct folio *folio)
160 {
161 	struct rmap_walk_control rwc = {
162 		.rmap_one = damon_folio_mkold_one,
163 		.anon_lock = folio_lock_anon_vma_read,
164 	};
165 
166 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
167 		folio_set_idle(folio);
168 		return;
169 	}
170 
171 	if (!folio_trylock(folio))
172 		return;
173 
174 	rmap_walk(folio, &rwc);
175 	folio_unlock(folio);
176 
177 }
178 
179 static bool damon_folio_young_one(struct folio *folio,
180 		struct vm_area_struct *vma, unsigned long addr, void *arg)
181 {
182 	bool *accessed = arg;
183 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
184 	pte_t pte;
185 
186 	*accessed = false;
187 	while (page_vma_mapped_walk(&pvmw)) {
188 		addr = pvmw.address;
189 		if (pvmw.pte) {
190 			pte = ptep_get(pvmw.pte);
191 
192 			/*
193 			 * PFN swap PTEs, such as device-exclusive ones, that
194 			 * actually map pages are "old" from a CPU perspective.
195 			 * The MMU notifier takes care of any device aspects.
196 			 */
197 			*accessed = (pte_present(pte) && pte_young(pte)) ||
198 				!folio_test_idle(folio) ||
199 				mmu_notifier_test_young(vma->vm_mm, addr);
200 		} else {
201 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
202 			*accessed = pmd_young(pmdp_get(pvmw.pmd)) ||
203 				!folio_test_idle(folio) ||
204 				mmu_notifier_test_young(vma->vm_mm, addr);
205 #else
206 			WARN_ON_ONCE(1);
207 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
208 		}
209 		if (*accessed) {
210 			page_vma_mapped_walk_done(&pvmw);
211 			break;
212 		}
213 	}
214 
215 	/* If accessed, stop walking */
216 	return *accessed == false;
217 }
218 
219 bool damon_folio_young(struct folio *folio)
220 {
221 	bool accessed = false;
222 	struct rmap_walk_control rwc = {
223 		.arg = &accessed,
224 		.rmap_one = damon_folio_young_one,
225 		.anon_lock = folio_lock_anon_vma_read,
226 	};
227 
228 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
229 		if (folio_test_idle(folio))
230 			return false;
231 		else
232 			return true;
233 	}
234 
235 	if (!folio_trylock(folio))
236 		return false;
237 
238 	rmap_walk(folio, &rwc);
239 	folio_unlock(folio);
240 
241 	return accessed;
242 }
243 
244 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio)
245 {
246 	bool matched = false;
247 	struct mem_cgroup *memcg;
248 	size_t folio_sz;
249 
250 	switch (filter->type) {
251 	case DAMOS_FILTER_TYPE_ANON:
252 		matched = folio_test_anon(folio);
253 		break;
254 	case DAMOS_FILTER_TYPE_ACTIVE:
255 		matched = folio_test_active(folio);
256 		break;
257 	case DAMOS_FILTER_TYPE_MEMCG:
258 		rcu_read_lock();
259 		memcg = folio_memcg_check(folio);
260 		if (!memcg)
261 			matched = false;
262 		else
263 			matched = filter->memcg_id == mem_cgroup_id(memcg);
264 		rcu_read_unlock();
265 		break;
266 	case DAMOS_FILTER_TYPE_YOUNG:
267 		matched = damon_folio_young(folio);
268 		if (matched)
269 			damon_folio_mkold(folio);
270 		break;
271 	case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
272 		folio_sz = folio_size(folio);
273 		matched = filter->sz_range.min <= folio_sz &&
274 			  folio_sz <= filter->sz_range.max;
275 		break;
276 	case DAMOS_FILTER_TYPE_UNMAPPED:
277 		matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
278 		break;
279 	default:
280 		break;
281 	}
282 
283 	return matched == filter->matching;
284 }
285 
286 static unsigned int __damon_migrate_folio_list(
287 		struct list_head *migrate_folios, struct pglist_data *pgdat,
288 		int target_nid)
289 {
290 	unsigned int nr_succeeded = 0;
291 	struct migration_target_control mtc = {
292 		/*
293 		 * Allocate from 'node', or fail quickly and quietly.
294 		 * When this happens, 'page' will likely just be discarded
295 		 * instead of migrated.
296 		 */
297 		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
298 			__GFP_NOMEMALLOC | GFP_NOWAIT,
299 		.nid = target_nid,
300 	};
301 
302 	if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
303 		return 0;
304 
305 	if (list_empty(migrate_folios))
306 		return 0;
307 
308 	/* Migration ignores all cpuset and mempolicy settings */
309 	migrate_pages(migrate_folios, alloc_migration_target, NULL,
310 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
311 		      &nr_succeeded);
312 
313 	return nr_succeeded;
314 }
315 
316 static unsigned int damon_migrate_folio_list(struct list_head *folio_list,
317 						struct pglist_data *pgdat,
318 						int target_nid)
319 {
320 	unsigned int nr_migrated = 0;
321 	struct folio *folio;
322 	LIST_HEAD(ret_folios);
323 	LIST_HEAD(migrate_folios);
324 
325 	while (!list_empty(folio_list)) {
326 		struct folio *folio;
327 
328 		cond_resched();
329 
330 		folio = lru_to_folio(folio_list);
331 		list_del(&folio->lru);
332 
333 		if (!folio_trylock(folio))
334 			goto keep;
335 
336 		/* Relocate its contents to another node. */
337 		list_add(&folio->lru, &migrate_folios);
338 		folio_unlock(folio);
339 		continue;
340 keep:
341 		list_add(&folio->lru, &ret_folios);
342 	}
343 	/* 'folio_list' is always empty here */
344 
345 	/* Migrate folios selected for migration */
346 	nr_migrated += __damon_migrate_folio_list(
347 			&migrate_folios, pgdat, target_nid);
348 	/*
349 	 * Folios that could not be migrated are still in @migrate_folios.  Add
350 	 * those back on @folio_list
351 	 */
352 	if (!list_empty(&migrate_folios))
353 		list_splice_init(&migrate_folios, folio_list);
354 
355 	try_to_unmap_flush();
356 
357 	list_splice(&ret_folios, folio_list);
358 
359 	while (!list_empty(folio_list)) {
360 		folio = lru_to_folio(folio_list);
361 		list_del(&folio->lru);
362 		folio_putback_lru(folio);
363 	}
364 
365 	return nr_migrated;
366 }
367 
368 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
369 {
370 	int nid;
371 	unsigned long nr_migrated = 0;
372 	LIST_HEAD(node_folio_list);
373 	unsigned int noreclaim_flag;
374 
375 	if (list_empty(folio_list))
376 		return nr_migrated;
377 
378 	if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
379 			!node_state(target_nid, N_MEMORY))
380 		return nr_migrated;
381 
382 	noreclaim_flag = memalloc_noreclaim_save();
383 
384 	nid = folio_nid(lru_to_folio(folio_list));
385 	do {
386 		struct folio *folio = lru_to_folio(folio_list);
387 
388 		if (nid == folio_nid(folio)) {
389 			list_move(&folio->lru, &node_folio_list);
390 			continue;
391 		}
392 
393 		nr_migrated += damon_migrate_folio_list(&node_folio_list,
394 							   NODE_DATA(nid),
395 							   target_nid);
396 		nid = folio_nid(lru_to_folio(folio_list));
397 	} while (!list_empty(folio_list));
398 
399 	nr_migrated += damon_migrate_folio_list(&node_folio_list,
400 						   NODE_DATA(nid),
401 						   target_nid);
402 
403 	memalloc_noreclaim_restore(noreclaim_flag);
404 
405 	return nr_migrated;
406 }
407 
408 bool damos_ops_has_filter(struct damos *s)
409 {
410 	struct damos_filter *f;
411 
412 	damos_for_each_ops_filter(f, s)
413 		return true;
414 	return false;
415 }
416