1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Common Code for Data Access Monitoring
4 *
5 * Author: SeongJae Park <sj@kernel.org>
6 */
7
8 #include <linux/migrate.h>
9 #include <linux/mmu_notifier.h>
10 #include <linux/page_idle.h>
11 #include <linux/pagemap.h>
12 #include <linux/rmap.h>
13 #include <linux/swap.h>
14 #include <linux/leafops.h>
15
16 #include "../internal.h"
17 #include "ops-common.h"
18
19 /*
20 * Get an online page for a pfn if it's in the LRU list. Otherwise, returns
21 * NULL.
22 *
23 * The body of this function is stolen from the 'page_idle_get_folio()'. We
24 * steal rather than reuse it because the code is quite simple.
25 */
damon_get_folio(unsigned long pfn)26 struct folio *damon_get_folio(unsigned long pfn)
27 {
28 struct page *page = pfn_to_online_page(pfn);
29 struct folio *folio;
30
31 if (!page)
32 return NULL;
33
34 folio = page_folio(page);
35 if (!folio_test_lru(folio) || !folio_try_get(folio))
36 return NULL;
37 if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
38 folio_put(folio);
39 folio = NULL;
40 }
41 return folio;
42 }
43
damon_ptep_mkold(pte_t * pte,struct vm_area_struct * vma,unsigned long addr)44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
45 {
46 pte_t pteval = ptep_get(pte);
47 struct folio *folio;
48 bool young = false;
49 unsigned long pfn;
50
51 if (likely(pte_present(pteval)))
52 pfn = pte_pfn(pteval);
53 else
54 pfn = softleaf_to_pfn(softleaf_from_pte(pteval));
55
56 folio = damon_get_folio(pfn);
57 if (!folio)
58 return;
59
60 /*
61 * PFN swap PTEs, such as device-exclusive ones, that actually map pages
62 * are "old" from a CPU perspective. The MMU notifier takes care of any
63 * device aspects.
64 */
65 if (likely(pte_present(pteval)))
66 young |= ptep_test_and_clear_young(vma, addr, pte);
67 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
68 if (young)
69 folio_set_young(folio);
70
71 folio_set_idle(folio);
72 folio_put(folio);
73 }
74
damon_pmdp_mkold(pmd_t * pmd,struct vm_area_struct * vma,unsigned long addr)75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
76 {
77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
78 pmd_t pmdval = pmdp_get(pmd);
79 struct folio *folio;
80 bool young = false;
81 unsigned long pfn;
82
83 if (likely(pmd_present(pmdval)))
84 pfn = pmd_pfn(pmdval);
85 else
86 pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval));
87
88 folio = damon_get_folio(pfn);
89 if (!folio)
90 return;
91
92 if (likely(pmd_present(pmdval)))
93 young |= pmdp_clear_young_notify(vma, addr, pmd);
94 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE);
95 if (young)
96 folio_set_young(folio);
97
98 folio_set_idle(folio);
99 folio_put(folio);
100 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
101 }
102
103 #define DAMON_MAX_SUBSCORE (100)
104 #define DAMON_MAX_AGE_IN_LOG (32)
105
damon_hot_score(struct damon_ctx * c,struct damon_region * r,struct damos * s)106 int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
107 struct damos *s)
108 {
109 int freq_subscore;
110 unsigned int age_in_sec;
111 int age_in_log, age_subscore;
112 unsigned int freq_weight = s->quota.weight_nr_accesses;
113 unsigned int age_weight = s->quota.weight_age;
114 int hotness;
115
116 freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
117 damon_max_nr_accesses(&c->attrs);
118
119 age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
120 for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
121 age_in_log++, age_in_sec >>= 1)
122 ;
123
124 /* If frequency is 0, higher age means it's colder */
125 if (freq_subscore == 0)
126 age_in_log *= -1;
127
128 /*
129 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
130 * Scale it to be in [0, 100] and set it as age subscore.
131 */
132 age_in_log += DAMON_MAX_AGE_IN_LOG;
133 age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
134 DAMON_MAX_AGE_IN_LOG / 2;
135
136 hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
137 if (freq_weight + age_weight)
138 hotness /= freq_weight + age_weight;
139 /*
140 * Transform it to fit in [0, DAMOS_MAX_SCORE]
141 */
142 hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
143
144 return hotness;
145 }
146
damon_cold_score(struct damon_ctx * c,struct damon_region * r,struct damos * s)147 int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
148 struct damos *s)
149 {
150 int hotness = damon_hot_score(c, r, s);
151
152 /* Return coldness of the region */
153 return DAMOS_MAX_SCORE - hotness;
154 }
155
damon_folio_mkold_one(struct folio * folio,struct vm_area_struct * vma,unsigned long addr,void * arg)156 static bool damon_folio_mkold_one(struct folio *folio,
157 struct vm_area_struct *vma, unsigned long addr, void *arg)
158 {
159 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
160
161 while (page_vma_mapped_walk(&pvmw)) {
162 addr = pvmw.address;
163 if (pvmw.pte)
164 damon_ptep_mkold(pvmw.pte, vma, addr);
165 else
166 damon_pmdp_mkold(pvmw.pmd, vma, addr);
167 }
168 return true;
169 }
170
damon_folio_mkold(struct folio * folio)171 void damon_folio_mkold(struct folio *folio)
172 {
173 struct rmap_walk_control rwc = {
174 .rmap_one = damon_folio_mkold_one,
175 .anon_lock = folio_lock_anon_vma_read,
176 };
177
178 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
179 folio_set_idle(folio);
180 return;
181 }
182
183 if (!folio_trylock(folio))
184 return;
185
186 rmap_walk(folio, &rwc);
187 folio_unlock(folio);
188
189 }
190
damon_folio_young_one(struct folio * folio,struct vm_area_struct * vma,unsigned long addr,void * arg)191 static bool damon_folio_young_one(struct folio *folio,
192 struct vm_area_struct *vma, unsigned long addr, void *arg)
193 {
194 bool *accessed = arg;
195 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
196 pte_t pte;
197
198 *accessed = false;
199 while (page_vma_mapped_walk(&pvmw)) {
200 addr = pvmw.address;
201 if (pvmw.pte) {
202 pte = ptep_get(pvmw.pte);
203
204 /*
205 * PFN swap PTEs, such as device-exclusive ones, that
206 * actually map pages are "old" from a CPU perspective.
207 * The MMU notifier takes care of any device aspects.
208 */
209 *accessed = (pte_present(pte) && pte_young(pte)) ||
210 !folio_test_idle(folio) ||
211 mmu_notifier_test_young(vma->vm_mm, addr);
212 } else {
213 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
214 pmd_t pmd = pmdp_get(pvmw.pmd);
215
216 *accessed = (pmd_present(pmd) && pmd_young(pmd)) ||
217 !folio_test_idle(folio) ||
218 mmu_notifier_test_young(vma->vm_mm, addr);
219 #else
220 WARN_ON_ONCE(1);
221 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
222 }
223 if (*accessed) {
224 page_vma_mapped_walk_done(&pvmw);
225 break;
226 }
227 }
228
229 /* If accessed, stop walking */
230 return *accessed == false;
231 }
232
damon_folio_young(struct folio * folio)233 bool damon_folio_young(struct folio *folio)
234 {
235 bool accessed = false;
236 struct rmap_walk_control rwc = {
237 .arg = &accessed,
238 .rmap_one = damon_folio_young_one,
239 .anon_lock = folio_lock_anon_vma_read,
240 };
241
242 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
243 if (folio_test_idle(folio))
244 return false;
245 else
246 return true;
247 }
248
249 if (!folio_trylock(folio))
250 return false;
251
252 rmap_walk(folio, &rwc);
253 folio_unlock(folio);
254
255 return accessed;
256 }
257
damos_folio_filter_match(struct damos_filter * filter,struct folio * folio)258 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio)
259 {
260 bool matched = false;
261 struct mem_cgroup *memcg;
262 size_t folio_sz;
263
264 switch (filter->type) {
265 case DAMOS_FILTER_TYPE_ANON:
266 matched = folio_test_anon(folio);
267 break;
268 case DAMOS_FILTER_TYPE_ACTIVE:
269 matched = folio_test_active(folio);
270 break;
271 case DAMOS_FILTER_TYPE_MEMCG:
272 rcu_read_lock();
273 memcg = folio_memcg_check(folio);
274 if (!memcg)
275 matched = false;
276 else
277 matched = filter->memcg_id == mem_cgroup_id(memcg);
278 rcu_read_unlock();
279 break;
280 case DAMOS_FILTER_TYPE_YOUNG:
281 matched = damon_folio_young(folio);
282 if (matched)
283 damon_folio_mkold(folio);
284 break;
285 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
286 folio_sz = folio_size(folio);
287 matched = filter->sz_range.min <= folio_sz &&
288 folio_sz <= filter->sz_range.max;
289 break;
290 case DAMOS_FILTER_TYPE_UNMAPPED:
291 matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
292 break;
293 default:
294 break;
295 }
296
297 return matched == filter->matching;
298 }
299
__damon_migrate_folio_list(struct list_head * migrate_folios,struct pglist_data * pgdat,int target_nid)300 static unsigned int __damon_migrate_folio_list(
301 struct list_head *migrate_folios, struct pglist_data *pgdat,
302 int target_nid)
303 {
304 unsigned int nr_succeeded = 0;
305 struct migration_target_control mtc = {
306 /*
307 * Allocate from 'node', or fail quickly and quietly.
308 * When this happens, 'page' will likely just be discarded
309 * instead of migrated.
310 */
311 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
312 __GFP_NOMEMALLOC | GFP_NOWAIT,
313 .nid = target_nid,
314 };
315
316 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
317 return 0;
318
319 if (list_empty(migrate_folios))
320 return 0;
321
322 /* Migration ignores all cpuset and mempolicy settings */
323 migrate_pages(migrate_folios, alloc_migration_target, NULL,
324 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
325 &nr_succeeded);
326
327 return nr_succeeded;
328 }
329
damon_migrate_folio_list(struct list_head * folio_list,struct pglist_data * pgdat,int target_nid)330 static unsigned int damon_migrate_folio_list(struct list_head *folio_list,
331 struct pglist_data *pgdat,
332 int target_nid)
333 {
334 unsigned int nr_migrated = 0;
335 struct folio *folio;
336 LIST_HEAD(ret_folios);
337 LIST_HEAD(migrate_folios);
338
339 while (!list_empty(folio_list)) {
340 struct folio *folio;
341
342 cond_resched();
343
344 folio = lru_to_folio(folio_list);
345 list_del(&folio->lru);
346
347 if (!folio_trylock(folio))
348 goto keep;
349
350 /* Relocate its contents to another node. */
351 list_add(&folio->lru, &migrate_folios);
352 folio_unlock(folio);
353 continue;
354 keep:
355 list_add(&folio->lru, &ret_folios);
356 }
357 /* 'folio_list' is always empty here */
358
359 /* Migrate folios selected for migration */
360 nr_migrated += __damon_migrate_folio_list(
361 &migrate_folios, pgdat, target_nid);
362 /*
363 * Folios that could not be migrated are still in @migrate_folios. Add
364 * those back on @folio_list
365 */
366 if (!list_empty(&migrate_folios))
367 list_splice_init(&migrate_folios, folio_list);
368
369 try_to_unmap_flush();
370
371 list_splice(&ret_folios, folio_list);
372
373 while (!list_empty(folio_list)) {
374 folio = lru_to_folio(folio_list);
375 list_del(&folio->lru);
376 folio_putback_lru(folio);
377 }
378
379 return nr_migrated;
380 }
381
damon_migrate_pages(struct list_head * folio_list,int target_nid)382 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
383 {
384 int nid;
385 unsigned long nr_migrated = 0;
386 LIST_HEAD(node_folio_list);
387 unsigned int noreclaim_flag;
388
389 if (list_empty(folio_list))
390 return nr_migrated;
391
392 if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
393 !node_state(target_nid, N_MEMORY))
394 return nr_migrated;
395
396 noreclaim_flag = memalloc_noreclaim_save();
397
398 nid = folio_nid(lru_to_folio(folio_list));
399 do {
400 struct folio *folio = lru_to_folio(folio_list);
401
402 if (nid == folio_nid(folio)) {
403 list_move(&folio->lru, &node_folio_list);
404 continue;
405 }
406
407 nr_migrated += damon_migrate_folio_list(&node_folio_list,
408 NODE_DATA(nid),
409 target_nid);
410 nid = folio_nid(lru_to_folio(folio_list));
411 } while (!list_empty(folio_list));
412
413 nr_migrated += damon_migrate_folio_list(&node_folio_list,
414 NODE_DATA(nid),
415 target_nid);
416
417 memalloc_noreclaim_restore(noreclaim_flag);
418
419 return nr_migrated;
420 }
421
damos_ops_has_filter(struct damos * s)422 bool damos_ops_has_filter(struct damos *s)
423 {
424 struct damos_filter *f;
425
426 damos_for_each_ops_filter(f, s)
427 return true;
428 return false;
429 }
430