1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common Code for Data Access Monitoring 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #include <linux/migrate.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/page_idle.h> 11 #include <linux/pagemap.h> 12 #include <linux/rmap.h> 13 #include <linux/swap.h> 14 #include <linux/leafops.h> 15 16 #include "../internal.h" 17 #include "ops-common.h" 18 19 /* 20 * Get an online page for a pfn if it's in the LRU list. Otherwise, returns 21 * NULL. 22 * 23 * The body of this function is stolen from the 'page_idle_get_folio()'. We 24 * steal rather than reuse it because the code is quite simple. 25 */ 26 struct folio *damon_get_folio(unsigned long pfn) 27 { 28 struct page *page = pfn_to_online_page(pfn); 29 struct folio *folio; 30 31 if (!page) 32 return NULL; 33 34 folio = page_folio(page); 35 if (!folio_test_lru(folio) || !folio_try_get(folio)) 36 return NULL; 37 if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { 38 folio_put(folio); 39 folio = NULL; 40 } 41 return folio; 42 } 43 44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) 45 { 46 pte_t pteval = ptep_get(pte); 47 struct folio *folio; 48 bool young = false; 49 unsigned long pfn; 50 51 if (likely(pte_present(pteval))) 52 pfn = pte_pfn(pteval); 53 else 54 pfn = softleaf_to_pfn(softleaf_from_pte(pteval)); 55 56 folio = damon_get_folio(pfn); 57 if (!folio) 58 return; 59 60 /* 61 * PFN swap PTEs, such as device-exclusive ones, that actually map pages 62 * are "old" from a CPU perspective. The MMU notifier takes care of any 63 * device aspects. 64 */ 65 if (likely(pte_present(pteval))) 66 young |= ptep_test_and_clear_young(vma, addr, pte); 67 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); 68 if (young) 69 folio_set_young(folio); 70 71 folio_set_idle(folio); 72 folio_put(folio); 73 } 74 75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) 76 { 77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 78 pmd_t pmdval = pmdp_get(pmd); 79 struct folio *folio; 80 bool young = false; 81 unsigned long pfn; 82 83 if (likely(pmd_present(pmdval))) 84 pfn = pmd_pfn(pmdval); 85 else 86 pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); 87 88 folio = damon_get_folio(pfn); 89 if (!folio) 90 return; 91 92 if (likely(pmd_present(pmdval))) 93 young |= pmdp_clear_young_notify(vma, addr, pmd); 94 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE); 95 if (young) 96 folio_set_young(folio); 97 98 folio_set_idle(folio); 99 folio_put(folio); 100 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 101 } 102 103 #define DAMON_MAX_SUBSCORE (100) 104 #define DAMON_MAX_AGE_IN_LOG (32) 105 106 int damon_hot_score(struct damon_ctx *c, struct damon_region *r, 107 struct damos *s) 108 { 109 int freq_subscore; 110 unsigned int age_in_sec; 111 int age_in_log, age_subscore; 112 unsigned int freq_weight = s->quota.weight_nr_accesses; 113 unsigned int age_weight = s->quota.weight_age; 114 int hotness; 115 116 freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / 117 damon_max_nr_accesses(&c->attrs); 118 119 age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; 120 for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; 121 age_in_log++, age_in_sec >>= 1) 122 ; 123 124 /* If frequency is 0, higher age means it's colder */ 125 if (freq_subscore == 0) 126 age_in_log *= -1; 127 128 /* 129 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. 130 * Scale it to be in [0, 100] and set it as age subscore. 131 */ 132 age_in_log += DAMON_MAX_AGE_IN_LOG; 133 age_subscore = age_in_log * DAMON_MAX_SUBSCORE / 134 DAMON_MAX_AGE_IN_LOG / 2; 135 136 hotness = (freq_weight * freq_subscore + age_weight * age_subscore); 137 if (freq_weight + age_weight) 138 hotness /= freq_weight + age_weight; 139 /* 140 * Transform it to fit in [0, DAMOS_MAX_SCORE] 141 */ 142 hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; 143 144 return hotness; 145 } 146 147 int damon_cold_score(struct damon_ctx *c, struct damon_region *r, 148 struct damos *s) 149 { 150 int hotness = damon_hot_score(c, r, s); 151 152 /* Return coldness of the region */ 153 return DAMOS_MAX_SCORE - hotness; 154 } 155 156 static bool damon_folio_mkold_one(struct folio *folio, 157 struct vm_area_struct *vma, unsigned long addr, void *arg) 158 { 159 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 160 161 while (page_vma_mapped_walk(&pvmw)) { 162 addr = pvmw.address; 163 if (pvmw.pte) 164 damon_ptep_mkold(pvmw.pte, vma, addr); 165 else 166 damon_pmdp_mkold(pvmw.pmd, vma, addr); 167 } 168 return true; 169 } 170 171 void damon_folio_mkold(struct folio *folio) 172 { 173 struct rmap_walk_control rwc = { 174 .rmap_one = damon_folio_mkold_one, 175 .anon_lock = folio_lock_anon_vma_read, 176 }; 177 178 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 179 folio_set_idle(folio); 180 return; 181 } 182 183 if (!folio_trylock(folio)) 184 return; 185 186 rmap_walk(folio, &rwc); 187 folio_unlock(folio); 188 189 } 190 191 static bool damon_folio_young_one(struct folio *folio, 192 struct vm_area_struct *vma, unsigned long addr, void *arg) 193 { 194 bool *accessed = arg; 195 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 196 pte_t pte; 197 198 *accessed = false; 199 while (page_vma_mapped_walk(&pvmw)) { 200 addr = pvmw.address; 201 if (pvmw.pte) { 202 pte = ptep_get(pvmw.pte); 203 204 /* 205 * PFN swap PTEs, such as device-exclusive ones, that 206 * actually map pages are "old" from a CPU perspective. 207 * The MMU notifier takes care of any device aspects. 208 */ 209 *accessed = (pte_present(pte) && pte_young(pte)) || 210 !folio_test_idle(folio) || 211 mmu_notifier_test_young(vma->vm_mm, addr); 212 } else { 213 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 214 pmd_t pmd = pmdp_get(pvmw.pmd); 215 216 *accessed = (pmd_present(pmd) && pmd_young(pmd)) || 217 !folio_test_idle(folio) || 218 mmu_notifier_test_young(vma->vm_mm, addr); 219 #else 220 WARN_ON_ONCE(1); 221 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 222 } 223 if (*accessed) { 224 page_vma_mapped_walk_done(&pvmw); 225 break; 226 } 227 } 228 229 /* If accessed, stop walking */ 230 return *accessed == false; 231 } 232 233 bool damon_folio_young(struct folio *folio) 234 { 235 bool accessed = false; 236 struct rmap_walk_control rwc = { 237 .arg = &accessed, 238 .rmap_one = damon_folio_young_one, 239 .anon_lock = folio_lock_anon_vma_read, 240 }; 241 242 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 243 if (folio_test_idle(folio)) 244 return false; 245 else 246 return true; 247 } 248 249 if (!folio_trylock(folio)) 250 return false; 251 252 rmap_walk(folio, &rwc); 253 folio_unlock(folio); 254 255 return accessed; 256 } 257 258 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) 259 { 260 bool matched = false; 261 struct mem_cgroup *memcg; 262 size_t folio_sz; 263 264 switch (filter->type) { 265 case DAMOS_FILTER_TYPE_ANON: 266 matched = folio_test_anon(folio); 267 break; 268 case DAMOS_FILTER_TYPE_ACTIVE: 269 matched = folio_test_active(folio); 270 break; 271 case DAMOS_FILTER_TYPE_MEMCG: 272 rcu_read_lock(); 273 memcg = folio_memcg_check(folio); 274 if (!memcg) 275 matched = false; 276 else 277 matched = filter->memcg_id == mem_cgroup_id(memcg); 278 rcu_read_unlock(); 279 break; 280 case DAMOS_FILTER_TYPE_YOUNG: 281 matched = damon_folio_young(folio); 282 if (matched) 283 damon_folio_mkold(folio); 284 break; 285 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: 286 folio_sz = folio_size(folio); 287 matched = filter->sz_range.min <= folio_sz && 288 folio_sz <= filter->sz_range.max; 289 break; 290 case DAMOS_FILTER_TYPE_UNMAPPED: 291 matched = !folio_mapped(folio) || !folio_raw_mapping(folio); 292 break; 293 default: 294 break; 295 } 296 297 return matched == filter->matching; 298 } 299 300 static unsigned int __damon_migrate_folio_list( 301 struct list_head *migrate_folios, struct pglist_data *pgdat, 302 int target_nid) 303 { 304 unsigned int nr_succeeded = 0; 305 struct migration_target_control mtc = { 306 /* 307 * Allocate from 'node', or fail quickly and quietly. 308 * When this happens, 'page' will likely just be discarded 309 * instead of migrated. 310 */ 311 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 312 __GFP_NOMEMALLOC | GFP_NOWAIT, 313 .nid = target_nid, 314 }; 315 316 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 317 return 0; 318 319 if (list_empty(migrate_folios)) 320 return 0; 321 322 /* Migration ignores all cpuset and mempolicy settings */ 323 migrate_pages(migrate_folios, alloc_migration_target, NULL, 324 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 325 &nr_succeeded); 326 327 return nr_succeeded; 328 } 329 330 static unsigned int damon_migrate_folio_list(struct list_head *folio_list, 331 struct pglist_data *pgdat, 332 int target_nid) 333 { 334 unsigned int nr_migrated = 0; 335 struct folio *folio; 336 LIST_HEAD(ret_folios); 337 LIST_HEAD(migrate_folios); 338 339 while (!list_empty(folio_list)) { 340 struct folio *folio; 341 342 cond_resched(); 343 344 folio = lru_to_folio(folio_list); 345 list_del(&folio->lru); 346 347 if (!folio_trylock(folio)) 348 goto keep; 349 350 /* Relocate its contents to another node. */ 351 list_add(&folio->lru, &migrate_folios); 352 folio_unlock(folio); 353 continue; 354 keep: 355 list_add(&folio->lru, &ret_folios); 356 } 357 /* 'folio_list' is always empty here */ 358 359 /* Migrate folios selected for migration */ 360 nr_migrated += __damon_migrate_folio_list( 361 &migrate_folios, pgdat, target_nid); 362 /* 363 * Folios that could not be migrated are still in @migrate_folios. Add 364 * those back on @folio_list 365 */ 366 if (!list_empty(&migrate_folios)) 367 list_splice_init(&migrate_folios, folio_list); 368 369 try_to_unmap_flush(); 370 371 list_splice(&ret_folios, folio_list); 372 373 while (!list_empty(folio_list)) { 374 folio = lru_to_folio(folio_list); 375 list_del(&folio->lru); 376 folio_putback_lru(folio); 377 } 378 379 return nr_migrated; 380 } 381 382 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) 383 { 384 int nid; 385 unsigned long nr_migrated = 0; 386 LIST_HEAD(node_folio_list); 387 unsigned int noreclaim_flag; 388 389 if (list_empty(folio_list)) 390 return nr_migrated; 391 392 if (target_nid < 0 || target_nid >= MAX_NUMNODES || 393 !node_state(target_nid, N_MEMORY)) 394 return nr_migrated; 395 396 noreclaim_flag = memalloc_noreclaim_save(); 397 398 nid = folio_nid(lru_to_folio(folio_list)); 399 do { 400 struct folio *folio = lru_to_folio(folio_list); 401 402 if (nid == folio_nid(folio)) { 403 list_move(&folio->lru, &node_folio_list); 404 continue; 405 } 406 407 nr_migrated += damon_migrate_folio_list(&node_folio_list, 408 NODE_DATA(nid), 409 target_nid); 410 nid = folio_nid(lru_to_folio(folio_list)); 411 } while (!list_empty(folio_list)); 412 413 nr_migrated += damon_migrate_folio_list(&node_folio_list, 414 NODE_DATA(nid), 415 target_nid); 416 417 memalloc_noreclaim_restore(noreclaim_flag); 418 419 return nr_migrated; 420 } 421 422 bool damos_ops_has_filter(struct damos *s) 423 { 424 struct damos_filter *f; 425 426 damos_for_each_ops_filter(f, s) 427 return true; 428 return false; 429 } 430