xref: /linux/mm/damon/ops-common.c (revision 1cbe003b631d905d1b9da10cda6111f4263622e0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Common Code for Data Access Monitoring
4  *
5  * Author: SeongJae Park <sj@kernel.org>
6  */
7 
8 #include <linux/migrate.h>
9 #include <linux/mmu_notifier.h>
10 #include <linux/page_idle.h>
11 #include <linux/pagemap.h>
12 #include <linux/rmap.h>
13 #include <linux/swap.h>
14 #include <linux/leafops.h>
15 
16 #include "../internal.h"
17 #include "ops-common.h"
18 
19 /*
20  * Get an online page for a pfn if it's in the LRU list.  Otherwise, returns
21  * NULL.
22  *
23  * The body of this function is stolen from the 'page_idle_get_folio()'.  We
24  * steal rather than reuse it because the code is quite simple.
25  */
26 struct folio *damon_get_folio(unsigned long pfn)
27 {
28 	struct page *page = pfn_to_online_page(pfn);
29 	struct folio *folio;
30 
31 	if (!page)
32 		return NULL;
33 
34 	folio = page_folio(page);
35 	if (!folio_try_get(folio))
36 		return NULL;
37 	if (unlikely(page_folio(page) != folio) || !folio_test_lru(folio)) {
38 		folio_put(folio);
39 		folio = NULL;
40 	}
41 	return folio;
42 }
43 
44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
45 {
46 	pte_t pteval = ptep_get(pte);
47 	struct folio *folio;
48 	bool young = false;
49 	unsigned long pfn;
50 
51 	if (likely(pte_present(pteval)))
52 		pfn = pte_pfn(pteval);
53 	else
54 		pfn = softleaf_to_pfn(softleaf_from_pte(pteval));
55 
56 	folio = damon_get_folio(pfn);
57 	if (!folio)
58 		return;
59 
60 	/*
61 	 * PFN swap PTEs, such as device-exclusive ones, that actually map pages
62 	 * are "old" from a CPU perspective. The MMU notifier takes care of any
63 	 * device aspects.
64 	 */
65 	if (likely(pte_present(pteval)))
66 		young |= ptep_test_and_clear_young(vma, addr, pte);
67 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
68 	if (young)
69 		folio_set_young(folio);
70 
71 	folio_set_idle(folio);
72 	folio_put(folio);
73 }
74 
75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
76 {
77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
78 	pmd_t pmdval = pmdp_get(pmd);
79 	struct folio *folio;
80 	bool young = false;
81 	unsigned long pfn;
82 
83 	if (likely(pmd_present(pmdval)))
84 		pfn = pmd_pfn(pmdval);
85 	else
86 		pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval));
87 
88 	folio = damon_get_folio(pfn);
89 	if (!folio)
90 		return;
91 
92 	if (likely(pmd_present(pmdval)))
93 		young |= pmdp_test_and_clear_young(vma, addr, pmd);
94 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE);
95 	if (young)
96 		folio_set_young(folio);
97 
98 	folio_set_idle(folio);
99 	folio_put(folio);
100 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
101 }
102 
103 #define DAMON_MAX_SUBSCORE	(100)
104 #define DAMON_MAX_AGE_IN_LOG	(32)
105 
106 int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
107 			struct damos *s)
108 {
109 	int freq_subscore;
110 	unsigned int age_in_sec;
111 	int age_in_log, age_subscore;
112 	unsigned int freq_weight = s->quota.weight_nr_accesses;
113 	unsigned int age_weight = s->quota.weight_age;
114 	int hotness;
115 
116 	freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
117 		damon_max_nr_accesses(&c->attrs);
118 
119 	age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
120 	if (age_in_sec)
121 		age_in_log = min_t(int, ilog2(age_in_sec) + 1,
122 				DAMON_MAX_AGE_IN_LOG);
123 	else
124 		age_in_log = 0;
125 
126 
127 	/* If frequency is 0, higher age means it's colder */
128 	if (freq_subscore == 0)
129 		age_in_log *= -1;
130 
131 	/*
132 	 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
133 	 * Scale it to be in [0, 100] and set it as age subscore.
134 	 */
135 	age_in_log += DAMON_MAX_AGE_IN_LOG;
136 	age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
137 		DAMON_MAX_AGE_IN_LOG / 2;
138 
139 	hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
140 	if (freq_weight + age_weight)
141 		hotness /= freq_weight + age_weight;
142 	/*
143 	 * Transform it to fit in [0, DAMOS_MAX_SCORE]
144 	 */
145 	hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
146 
147 	return hotness;
148 }
149 
150 int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
151 			struct damos *s)
152 {
153 	int hotness = damon_hot_score(c, r, s);
154 
155 	/* Return coldness of the region */
156 	return DAMOS_MAX_SCORE - hotness;
157 }
158 
159 static bool damon_folio_mkold_one(struct folio *folio,
160 		struct vm_area_struct *vma, unsigned long addr, void *arg)
161 {
162 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
163 
164 	while (page_vma_mapped_walk(&pvmw)) {
165 		addr = pvmw.address;
166 		if (pvmw.pte)
167 			damon_ptep_mkold(pvmw.pte, vma, addr);
168 		else
169 			damon_pmdp_mkold(pvmw.pmd, vma, addr);
170 	}
171 	return true;
172 }
173 
174 void damon_folio_mkold(struct folio *folio)
175 {
176 	struct rmap_walk_control rwc = {
177 		.rmap_one = damon_folio_mkold_one,
178 		.anon_lock = folio_lock_anon_vma_read,
179 	};
180 
181 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
182 		folio_set_idle(folio);
183 		return;
184 	}
185 
186 	if (!folio_trylock(folio))
187 		return;
188 
189 	rmap_walk(folio, &rwc);
190 	folio_unlock(folio);
191 
192 }
193 
194 static bool damon_folio_young_one(struct folio *folio,
195 		struct vm_area_struct *vma, unsigned long addr, void *arg)
196 {
197 	bool *accessed = arg;
198 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
199 	pte_t pte;
200 
201 	*accessed = false;
202 	while (page_vma_mapped_walk(&pvmw)) {
203 		addr = pvmw.address;
204 		if (pvmw.pte) {
205 			pte = ptep_get(pvmw.pte);
206 
207 			/*
208 			 * PFN swap PTEs, such as device-exclusive ones, that
209 			 * actually map pages are "old" from a CPU perspective.
210 			 * The MMU notifier takes care of any device aspects.
211 			 */
212 			*accessed = (pte_present(pte) && pte_young(pte)) ||
213 				!folio_test_idle(folio) ||
214 				mmu_notifier_test_young(vma->vm_mm, addr);
215 		} else {
216 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
217 			pmd_t pmd = pmdp_get(pvmw.pmd);
218 
219 			*accessed = (pmd_present(pmd) && pmd_young(pmd)) ||
220 				!folio_test_idle(folio) ||
221 				mmu_notifier_test_young(vma->vm_mm, addr);
222 #else
223 			WARN_ON_ONCE(1);
224 #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
225 		}
226 		if (*accessed) {
227 			page_vma_mapped_walk_done(&pvmw);
228 			break;
229 		}
230 	}
231 
232 	/* If accessed, stop walking */
233 	return *accessed == false;
234 }
235 
236 bool damon_folio_young(struct folio *folio)
237 {
238 	bool accessed = false;
239 	struct rmap_walk_control rwc = {
240 		.arg = &accessed,
241 		.rmap_one = damon_folio_young_one,
242 		.anon_lock = folio_lock_anon_vma_read,
243 	};
244 
245 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
246 		if (folio_test_idle(folio))
247 			return false;
248 		else
249 			return true;
250 	}
251 
252 	if (!folio_trylock(folio))
253 		return false;
254 
255 	rmap_walk(folio, &rwc);
256 	folio_unlock(folio);
257 
258 	return accessed;
259 }
260 
261 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio)
262 {
263 	bool matched = false;
264 	struct mem_cgroup *memcg;
265 	size_t folio_sz;
266 
267 	switch (filter->type) {
268 	case DAMOS_FILTER_TYPE_ANON:
269 		matched = folio_test_anon(folio);
270 		break;
271 	case DAMOS_FILTER_TYPE_ACTIVE:
272 		matched = folio_test_active(folio);
273 		break;
274 	case DAMOS_FILTER_TYPE_MEMCG:
275 		rcu_read_lock();
276 		memcg = folio_memcg_check(folio);
277 		if (!memcg)
278 			matched = false;
279 		else
280 			matched = filter->memcg_id == mem_cgroup_id(memcg);
281 		rcu_read_unlock();
282 		break;
283 	case DAMOS_FILTER_TYPE_YOUNG:
284 		matched = damon_folio_young(folio);
285 		if (matched)
286 			damon_folio_mkold(folio);
287 		break;
288 	case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
289 		folio_sz = folio_size(folio);
290 		matched = filter->sz_range.min <= folio_sz &&
291 			  folio_sz <= filter->sz_range.max;
292 		break;
293 	case DAMOS_FILTER_TYPE_UNMAPPED:
294 		matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
295 		break;
296 	default:
297 		break;
298 	}
299 
300 	return matched == filter->matching;
301 }
302 
303 static unsigned int __damon_migrate_folio_list(
304 		struct list_head *migrate_folios, struct pglist_data *pgdat,
305 		int target_nid)
306 {
307 	unsigned int nr_succeeded = 0;
308 	struct migration_target_control mtc = {
309 		/*
310 		 * Allocate from 'node', or fail quickly and quietly.
311 		 * When this happens, 'page' will likely just be discarded
312 		 * instead of migrated.
313 		 */
314 		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
315 			__GFP_NOMEMALLOC | GFP_NOWAIT,
316 		.nid = target_nid,
317 	};
318 
319 	if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
320 		return 0;
321 
322 	if (list_empty(migrate_folios))
323 		return 0;
324 
325 	/* Migration ignores all cpuset and mempolicy settings */
326 	migrate_pages(migrate_folios, alloc_migration_target, NULL,
327 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
328 		      &nr_succeeded);
329 
330 	return nr_succeeded;
331 }
332 
333 static unsigned int damon_migrate_folio_list(struct list_head *folio_list,
334 						struct pglist_data *pgdat,
335 						int target_nid)
336 {
337 	unsigned int nr_migrated = 0;
338 	struct folio *folio;
339 	LIST_HEAD(ret_folios);
340 	LIST_HEAD(migrate_folios);
341 
342 	while (!list_empty(folio_list)) {
343 		struct folio *folio;
344 
345 		cond_resched();
346 
347 		folio = lru_to_folio(folio_list);
348 		list_del(&folio->lru);
349 
350 		if (!folio_trylock(folio))
351 			goto keep;
352 
353 		/* Relocate its contents to another node. */
354 		list_add(&folio->lru, &migrate_folios);
355 		folio_unlock(folio);
356 		continue;
357 keep:
358 		list_add(&folio->lru, &ret_folios);
359 	}
360 	/* 'folio_list' is always empty here */
361 
362 	/* Migrate folios selected for migration */
363 	nr_migrated += __damon_migrate_folio_list(
364 			&migrate_folios, pgdat, target_nid);
365 	/*
366 	 * Folios that could not be migrated are still in @migrate_folios.  Add
367 	 * those back on @folio_list
368 	 */
369 	if (!list_empty(&migrate_folios))
370 		list_splice_init(&migrate_folios, folio_list);
371 
372 	try_to_unmap_flush();
373 
374 	list_splice(&ret_folios, folio_list);
375 
376 	while (!list_empty(folio_list)) {
377 		folio = lru_to_folio(folio_list);
378 		list_del(&folio->lru);
379 		folio_putback_lru(folio);
380 	}
381 
382 	return nr_migrated;
383 }
384 
385 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
386 {
387 	int nid;
388 	unsigned long nr_migrated = 0;
389 	LIST_HEAD(node_folio_list);
390 	unsigned int noreclaim_flag;
391 
392 	if (list_empty(folio_list))
393 		return nr_migrated;
394 
395 	if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
396 			!node_state(target_nid, N_MEMORY))
397 		return nr_migrated;
398 
399 	noreclaim_flag = memalloc_noreclaim_save();
400 
401 	nid = folio_nid(lru_to_folio(folio_list));
402 	do {
403 		struct folio *folio = lru_to_folio(folio_list);
404 
405 		if (nid == folio_nid(folio)) {
406 			list_move(&folio->lru, &node_folio_list);
407 			continue;
408 		}
409 
410 		nr_migrated += damon_migrate_folio_list(&node_folio_list,
411 							   NODE_DATA(nid),
412 							   target_nid);
413 		nid = folio_nid(lru_to_folio(folio_list));
414 	} while (!list_empty(folio_list));
415 
416 	nr_migrated += damon_migrate_folio_list(&node_folio_list,
417 						   NODE_DATA(nid),
418 						   target_nid);
419 
420 	memalloc_noreclaim_restore(noreclaim_flag);
421 
422 	return nr_migrated;
423 }
424 
425 bool damos_ops_has_filter(struct damos *s)
426 {
427 	struct damos_filter *f;
428 
429 	damos_for_each_ops_filter(f, s)
430 		return true;
431 	return false;
432 }
433