1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common Code for Data Access Monitoring 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #include <linux/migrate.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/page_idle.h> 11 #include <linux/pagemap.h> 12 #include <linux/rmap.h> 13 #include <linux/swap.h> 14 #include <linux/leafops.h> 15 16 #include "../internal.h" 17 #include "ops-common.h" 18 19 /* 20 * Get an online page for a pfn if it's in the LRU list. Otherwise, returns 21 * NULL. 22 * 23 * The body of this function is stolen from the 'page_idle_get_folio()'. We 24 * steal rather than reuse it because the code is quite simple. 25 */ 26 struct folio *damon_get_folio(unsigned long pfn) 27 { 28 struct page *page = pfn_to_online_page(pfn); 29 struct folio *folio; 30 31 if (!page) 32 return NULL; 33 34 folio = page_folio(page); 35 if (!folio_try_get(folio)) 36 return NULL; 37 if (unlikely(page_folio(page) != folio) || !folio_test_lru(folio)) { 38 folio_put(folio); 39 folio = NULL; 40 } 41 return folio; 42 } 43 44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) 45 { 46 pte_t pteval = ptep_get(pte); 47 struct folio *folio; 48 bool young = false; 49 unsigned long pfn; 50 51 if (likely(pte_present(pteval))) 52 pfn = pte_pfn(pteval); 53 else 54 pfn = softleaf_to_pfn(softleaf_from_pte(pteval)); 55 56 folio = damon_get_folio(pfn); 57 if (!folio) 58 return; 59 60 /* 61 * PFN swap PTEs, such as device-exclusive ones, that actually map pages 62 * are "old" from a CPU perspective. The MMU notifier takes care of any 63 * device aspects. 64 */ 65 if (likely(pte_present(pteval))) 66 young |= ptep_test_and_clear_young(vma, addr, pte); 67 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); 68 if (young) 69 folio_set_young(folio); 70 71 folio_set_idle(folio); 72 folio_put(folio); 73 } 74 75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) 76 { 77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 78 pmd_t pmdval = pmdp_get(pmd); 79 struct folio *folio; 80 bool young = false; 81 unsigned long pfn; 82 83 if (likely(pmd_present(pmdval))) 84 pfn = pmd_pfn(pmdval); 85 else 86 pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); 87 88 folio = damon_get_folio(pfn); 89 if (!folio) 90 return; 91 92 if (likely(pmd_present(pmdval))) 93 young |= pmdp_test_and_clear_young(vma, addr, pmd); 94 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE); 95 if (young) 96 folio_set_young(folio); 97 98 folio_set_idle(folio); 99 folio_put(folio); 100 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 101 } 102 103 #define DAMON_MAX_SUBSCORE (100) 104 #define DAMON_MAX_AGE_IN_LOG (32) 105 106 int damon_hot_score(struct damon_ctx *c, struct damon_region *r, 107 struct damos *s) 108 { 109 int freq_subscore; 110 unsigned int age_in_sec; 111 int age_in_log, age_subscore; 112 unsigned int freq_weight = s->quota.weight_nr_accesses; 113 unsigned int age_weight = s->quota.weight_age; 114 int hotness; 115 116 freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / 117 damon_max_nr_accesses(&c->attrs); 118 119 age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; 120 if (age_in_sec) 121 age_in_log = min_t(int, ilog2(age_in_sec) + 1, 122 DAMON_MAX_AGE_IN_LOG); 123 else 124 age_in_log = 0; 125 126 127 /* If frequency is 0, higher age means it's colder */ 128 if (freq_subscore == 0) 129 age_in_log *= -1; 130 131 /* 132 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. 133 * Scale it to be in [0, 100] and set it as age subscore. 134 */ 135 age_in_log += DAMON_MAX_AGE_IN_LOG; 136 age_subscore = age_in_log * DAMON_MAX_SUBSCORE / 137 DAMON_MAX_AGE_IN_LOG / 2; 138 139 hotness = (freq_weight * freq_subscore + age_weight * age_subscore); 140 if (freq_weight + age_weight) 141 hotness /= freq_weight + age_weight; 142 /* 143 * Transform it to fit in [0, DAMOS_MAX_SCORE] 144 */ 145 hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; 146 147 return hotness; 148 } 149 150 int damon_cold_score(struct damon_ctx *c, struct damon_region *r, 151 struct damos *s) 152 { 153 int hotness = damon_hot_score(c, r, s); 154 155 /* Return coldness of the region */ 156 return DAMOS_MAX_SCORE - hotness; 157 } 158 159 static bool damon_folio_mkold_one(struct folio *folio, 160 struct vm_area_struct *vma, unsigned long addr, void *arg) 161 { 162 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 163 164 while (page_vma_mapped_walk(&pvmw)) { 165 addr = pvmw.address; 166 if (pvmw.pte) 167 damon_ptep_mkold(pvmw.pte, vma, addr); 168 else 169 damon_pmdp_mkold(pvmw.pmd, vma, addr); 170 } 171 return true; 172 } 173 174 void damon_folio_mkold(struct folio *folio) 175 { 176 struct rmap_walk_control rwc = { 177 .rmap_one = damon_folio_mkold_one, 178 .anon_lock = folio_lock_anon_vma_read, 179 }; 180 181 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 182 folio_set_idle(folio); 183 return; 184 } 185 186 if (!folio_trylock(folio)) 187 return; 188 189 rmap_walk(folio, &rwc); 190 folio_unlock(folio); 191 192 } 193 194 static bool damon_folio_young_one(struct folio *folio, 195 struct vm_area_struct *vma, unsigned long addr, void *arg) 196 { 197 bool *accessed = arg; 198 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 199 pte_t pte; 200 201 *accessed = false; 202 while (page_vma_mapped_walk(&pvmw)) { 203 addr = pvmw.address; 204 if (pvmw.pte) { 205 pte = ptep_get(pvmw.pte); 206 207 /* 208 * PFN swap PTEs, such as device-exclusive ones, that 209 * actually map pages are "old" from a CPU perspective. 210 * The MMU notifier takes care of any device aspects. 211 */ 212 *accessed = (pte_present(pte) && pte_young(pte)) || 213 !folio_test_idle(folio) || 214 mmu_notifier_test_young(vma->vm_mm, addr); 215 } else { 216 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 217 pmd_t pmd = pmdp_get(pvmw.pmd); 218 219 *accessed = (pmd_present(pmd) && pmd_young(pmd)) || 220 !folio_test_idle(folio) || 221 mmu_notifier_test_young(vma->vm_mm, addr); 222 #else 223 WARN_ON_ONCE(1); 224 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 225 } 226 if (*accessed) { 227 page_vma_mapped_walk_done(&pvmw); 228 break; 229 } 230 } 231 232 /* If accessed, stop walking */ 233 return *accessed == false; 234 } 235 236 bool damon_folio_young(struct folio *folio) 237 { 238 bool accessed = false; 239 struct rmap_walk_control rwc = { 240 .arg = &accessed, 241 .rmap_one = damon_folio_young_one, 242 .anon_lock = folio_lock_anon_vma_read, 243 }; 244 245 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 246 if (folio_test_idle(folio)) 247 return false; 248 else 249 return true; 250 } 251 252 if (!folio_trylock(folio)) 253 return false; 254 255 rmap_walk(folio, &rwc); 256 folio_unlock(folio); 257 258 return accessed; 259 } 260 261 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) 262 { 263 bool matched = false; 264 struct mem_cgroup *memcg; 265 size_t folio_sz; 266 267 switch (filter->type) { 268 case DAMOS_FILTER_TYPE_ANON: 269 matched = folio_test_anon(folio); 270 break; 271 case DAMOS_FILTER_TYPE_ACTIVE: 272 matched = folio_test_active(folio); 273 break; 274 case DAMOS_FILTER_TYPE_MEMCG: 275 rcu_read_lock(); 276 memcg = folio_memcg_check(folio); 277 if (!memcg) 278 matched = false; 279 else 280 matched = filter->memcg_id == mem_cgroup_id(memcg); 281 rcu_read_unlock(); 282 break; 283 case DAMOS_FILTER_TYPE_YOUNG: 284 matched = damon_folio_young(folio); 285 if (matched) 286 damon_folio_mkold(folio); 287 break; 288 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: 289 folio_sz = folio_size(folio); 290 matched = filter->sz_range.min <= folio_sz && 291 folio_sz <= filter->sz_range.max; 292 break; 293 case DAMOS_FILTER_TYPE_UNMAPPED: 294 matched = !folio_mapped(folio) || !folio_raw_mapping(folio); 295 break; 296 default: 297 break; 298 } 299 300 return matched == filter->matching; 301 } 302 303 static unsigned int __damon_migrate_folio_list( 304 struct list_head *migrate_folios, struct pglist_data *pgdat, 305 int target_nid) 306 { 307 unsigned int nr_succeeded = 0; 308 struct migration_target_control mtc = { 309 /* 310 * Allocate from 'node', or fail quickly and quietly. 311 * When this happens, 'page' will likely just be discarded 312 * instead of migrated. 313 */ 314 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 315 __GFP_NOMEMALLOC | GFP_NOWAIT, 316 .nid = target_nid, 317 }; 318 319 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 320 return 0; 321 322 if (list_empty(migrate_folios)) 323 return 0; 324 325 /* Migration ignores all cpuset and mempolicy settings */ 326 migrate_pages(migrate_folios, alloc_migration_target, NULL, 327 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 328 &nr_succeeded); 329 330 return nr_succeeded; 331 } 332 333 static unsigned int damon_migrate_folio_list(struct list_head *folio_list, 334 struct pglist_data *pgdat, 335 int target_nid) 336 { 337 unsigned int nr_migrated = 0; 338 struct folio *folio; 339 LIST_HEAD(ret_folios); 340 LIST_HEAD(migrate_folios); 341 342 while (!list_empty(folio_list)) { 343 struct folio *folio; 344 345 cond_resched(); 346 347 folio = lru_to_folio(folio_list); 348 list_del(&folio->lru); 349 350 if (!folio_trylock(folio)) 351 goto keep; 352 353 /* Relocate its contents to another node. */ 354 list_add(&folio->lru, &migrate_folios); 355 folio_unlock(folio); 356 continue; 357 keep: 358 list_add(&folio->lru, &ret_folios); 359 } 360 /* 'folio_list' is always empty here */ 361 362 /* Migrate folios selected for migration */ 363 nr_migrated += __damon_migrate_folio_list( 364 &migrate_folios, pgdat, target_nid); 365 /* 366 * Folios that could not be migrated are still in @migrate_folios. Add 367 * those back on @folio_list 368 */ 369 if (!list_empty(&migrate_folios)) 370 list_splice_init(&migrate_folios, folio_list); 371 372 try_to_unmap_flush(); 373 374 list_splice(&ret_folios, folio_list); 375 376 while (!list_empty(folio_list)) { 377 folio = lru_to_folio(folio_list); 378 list_del(&folio->lru); 379 folio_putback_lru(folio); 380 } 381 382 return nr_migrated; 383 } 384 385 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) 386 { 387 int nid; 388 unsigned long nr_migrated = 0; 389 LIST_HEAD(node_folio_list); 390 unsigned int noreclaim_flag; 391 392 if (list_empty(folio_list)) 393 return nr_migrated; 394 395 if (target_nid < 0 || target_nid >= MAX_NUMNODES || 396 !node_state(target_nid, N_MEMORY)) 397 return nr_migrated; 398 399 noreclaim_flag = memalloc_noreclaim_save(); 400 401 nid = folio_nid(lru_to_folio(folio_list)); 402 do { 403 struct folio *folio = lru_to_folio(folio_list); 404 405 if (nid == folio_nid(folio)) { 406 list_move(&folio->lru, &node_folio_list); 407 continue; 408 } 409 410 nr_migrated += damon_migrate_folio_list(&node_folio_list, 411 NODE_DATA(nid), 412 target_nid); 413 nid = folio_nid(lru_to_folio(folio_list)); 414 } while (!list_empty(folio_list)); 415 416 nr_migrated += damon_migrate_folio_list(&node_folio_list, 417 NODE_DATA(nid), 418 target_nid); 419 420 memalloc_noreclaim_restore(noreclaim_flag); 421 422 return nr_migrated; 423 } 424 425 bool damos_ops_has_filter(struct damos *s) 426 { 427 struct damos_filter *f; 428 429 damos_for_each_ops_filter(f, s) 430 return true; 431 return false; 432 } 433