1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common Code for Data Access Monitoring 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #include <linux/migrate.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/page_idle.h> 11 #include <linux/pagemap.h> 12 #include <linux/rmap.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 16 #include "../internal.h" 17 #include "ops-common.h" 18 19 /* 20 * Get an online page for a pfn if it's in the LRU list. Otherwise, returns 21 * NULL. 22 * 23 * The body of this function is stolen from the 'page_idle_get_folio()'. We 24 * steal rather than reuse it because the code is quite simple. 25 */ 26 struct folio *damon_get_folio(unsigned long pfn) 27 { 28 struct page *page = pfn_to_online_page(pfn); 29 struct folio *folio; 30 31 if (!page) 32 return NULL; 33 34 folio = page_folio(page); 35 if (!folio_test_lru(folio) || !folio_try_get(folio)) 36 return NULL; 37 if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { 38 folio_put(folio); 39 folio = NULL; 40 } 41 return folio; 42 } 43 44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) 45 { 46 pte_t pteval = ptep_get(pte); 47 struct folio *folio; 48 bool young = false; 49 unsigned long pfn; 50 51 if (likely(pte_present(pteval))) 52 pfn = pte_pfn(pteval); 53 else 54 pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); 55 56 folio = damon_get_folio(pfn); 57 if (!folio) 58 return; 59 60 /* 61 * PFN swap PTEs, such as device-exclusive ones, that actually map pages 62 * are "old" from a CPU perspective. The MMU notifier takes care of any 63 * device aspects. 64 */ 65 if (likely(pte_present(pteval))) 66 young |= ptep_test_and_clear_young(vma, addr, pte); 67 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); 68 if (young) 69 folio_set_young(folio); 70 71 folio_set_idle(folio); 72 folio_put(folio); 73 } 74 75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) 76 { 77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 78 struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd))); 79 80 if (!folio) 81 return; 82 83 if (pmdp_clear_young_notify(vma, addr, pmd)) 84 folio_set_young(folio); 85 86 folio_set_idle(folio); 87 folio_put(folio); 88 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 89 } 90 91 #define DAMON_MAX_SUBSCORE (100) 92 #define DAMON_MAX_AGE_IN_LOG (32) 93 94 int damon_hot_score(struct damon_ctx *c, struct damon_region *r, 95 struct damos *s) 96 { 97 int freq_subscore; 98 unsigned int age_in_sec; 99 int age_in_log, age_subscore; 100 unsigned int freq_weight = s->quota.weight_nr_accesses; 101 unsigned int age_weight = s->quota.weight_age; 102 int hotness; 103 104 freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / 105 damon_max_nr_accesses(&c->attrs); 106 107 age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; 108 for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; 109 age_in_log++, age_in_sec >>= 1) 110 ; 111 112 /* If frequency is 0, higher age means it's colder */ 113 if (freq_subscore == 0) 114 age_in_log *= -1; 115 116 /* 117 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. 118 * Scale it to be in [0, 100] and set it as age subscore. 119 */ 120 age_in_log += DAMON_MAX_AGE_IN_LOG; 121 age_subscore = age_in_log * DAMON_MAX_SUBSCORE / 122 DAMON_MAX_AGE_IN_LOG / 2; 123 124 hotness = (freq_weight * freq_subscore + age_weight * age_subscore); 125 if (freq_weight + age_weight) 126 hotness /= freq_weight + age_weight; 127 /* 128 * Transform it to fit in [0, DAMOS_MAX_SCORE] 129 */ 130 hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; 131 132 return hotness; 133 } 134 135 int damon_cold_score(struct damon_ctx *c, struct damon_region *r, 136 struct damos *s) 137 { 138 int hotness = damon_hot_score(c, r, s); 139 140 /* Return coldness of the region */ 141 return DAMOS_MAX_SCORE - hotness; 142 } 143 144 static bool damon_folio_mkold_one(struct folio *folio, 145 struct vm_area_struct *vma, unsigned long addr, void *arg) 146 { 147 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 148 149 while (page_vma_mapped_walk(&pvmw)) { 150 addr = pvmw.address; 151 if (pvmw.pte) 152 damon_ptep_mkold(pvmw.pte, vma, addr); 153 else 154 damon_pmdp_mkold(pvmw.pmd, vma, addr); 155 } 156 return true; 157 } 158 159 void damon_folio_mkold(struct folio *folio) 160 { 161 struct rmap_walk_control rwc = { 162 .rmap_one = damon_folio_mkold_one, 163 .anon_lock = folio_lock_anon_vma_read, 164 }; 165 bool need_lock; 166 167 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 168 folio_set_idle(folio); 169 return; 170 } 171 172 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 173 if (need_lock && !folio_trylock(folio)) 174 return; 175 176 rmap_walk(folio, &rwc); 177 178 if (need_lock) 179 folio_unlock(folio); 180 181 } 182 183 static bool damon_folio_young_one(struct folio *folio, 184 struct vm_area_struct *vma, unsigned long addr, void *arg) 185 { 186 bool *accessed = arg; 187 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 188 pte_t pte; 189 190 *accessed = false; 191 while (page_vma_mapped_walk(&pvmw)) { 192 addr = pvmw.address; 193 if (pvmw.pte) { 194 pte = ptep_get(pvmw.pte); 195 196 /* 197 * PFN swap PTEs, such as device-exclusive ones, that 198 * actually map pages are "old" from a CPU perspective. 199 * The MMU notifier takes care of any device aspects. 200 */ 201 *accessed = (pte_present(pte) && pte_young(pte)) || 202 !folio_test_idle(folio) || 203 mmu_notifier_test_young(vma->vm_mm, addr); 204 } else { 205 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 206 *accessed = pmd_young(pmdp_get(pvmw.pmd)) || 207 !folio_test_idle(folio) || 208 mmu_notifier_test_young(vma->vm_mm, addr); 209 #else 210 WARN_ON_ONCE(1); 211 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 212 } 213 if (*accessed) { 214 page_vma_mapped_walk_done(&pvmw); 215 break; 216 } 217 } 218 219 /* If accessed, stop walking */ 220 return *accessed == false; 221 } 222 223 bool damon_folio_young(struct folio *folio) 224 { 225 bool accessed = false; 226 struct rmap_walk_control rwc = { 227 .arg = &accessed, 228 .rmap_one = damon_folio_young_one, 229 .anon_lock = folio_lock_anon_vma_read, 230 }; 231 bool need_lock; 232 233 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 234 if (folio_test_idle(folio)) 235 return false; 236 else 237 return true; 238 } 239 240 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 241 if (need_lock && !folio_trylock(folio)) 242 return false; 243 244 rmap_walk(folio, &rwc); 245 246 if (need_lock) 247 folio_unlock(folio); 248 249 return accessed; 250 } 251 252 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) 253 { 254 bool matched = false; 255 struct mem_cgroup *memcg; 256 size_t folio_sz; 257 258 switch (filter->type) { 259 case DAMOS_FILTER_TYPE_ANON: 260 matched = folio_test_anon(folio); 261 break; 262 case DAMOS_FILTER_TYPE_ACTIVE: 263 matched = folio_test_active(folio); 264 break; 265 case DAMOS_FILTER_TYPE_MEMCG: 266 rcu_read_lock(); 267 memcg = folio_memcg_check(folio); 268 if (!memcg) 269 matched = false; 270 else 271 matched = filter->memcg_id == mem_cgroup_id(memcg); 272 rcu_read_unlock(); 273 break; 274 case DAMOS_FILTER_TYPE_YOUNG: 275 matched = damon_folio_young(folio); 276 if (matched) 277 damon_folio_mkold(folio); 278 break; 279 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: 280 folio_sz = folio_size(folio); 281 matched = filter->sz_range.min <= folio_sz && 282 folio_sz <= filter->sz_range.max; 283 break; 284 case DAMOS_FILTER_TYPE_UNMAPPED: 285 matched = !folio_mapped(folio) || !folio_raw_mapping(folio); 286 break; 287 default: 288 break; 289 } 290 291 return matched == filter->matching; 292 } 293 294 static unsigned int __damon_migrate_folio_list( 295 struct list_head *migrate_folios, struct pglist_data *pgdat, 296 int target_nid) 297 { 298 unsigned int nr_succeeded = 0; 299 struct migration_target_control mtc = { 300 /* 301 * Allocate from 'node', or fail quickly and quietly. 302 * When this happens, 'page' will likely just be discarded 303 * instead of migrated. 304 */ 305 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 306 __GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, 307 .nid = target_nid, 308 }; 309 310 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 311 return 0; 312 313 if (list_empty(migrate_folios)) 314 return 0; 315 316 /* Migration ignores all cpuset and mempolicy settings */ 317 migrate_pages(migrate_folios, alloc_migration_target, NULL, 318 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 319 &nr_succeeded); 320 321 return nr_succeeded; 322 } 323 324 static unsigned int damon_migrate_folio_list(struct list_head *folio_list, 325 struct pglist_data *pgdat, 326 int target_nid) 327 { 328 unsigned int nr_migrated = 0; 329 struct folio *folio; 330 LIST_HEAD(ret_folios); 331 LIST_HEAD(migrate_folios); 332 333 while (!list_empty(folio_list)) { 334 struct folio *folio; 335 336 cond_resched(); 337 338 folio = lru_to_folio(folio_list); 339 list_del(&folio->lru); 340 341 if (!folio_trylock(folio)) 342 goto keep; 343 344 /* Relocate its contents to another node. */ 345 list_add(&folio->lru, &migrate_folios); 346 folio_unlock(folio); 347 continue; 348 keep: 349 list_add(&folio->lru, &ret_folios); 350 } 351 /* 'folio_list' is always empty here */ 352 353 /* Migrate folios selected for migration */ 354 nr_migrated += __damon_migrate_folio_list( 355 &migrate_folios, pgdat, target_nid); 356 /* 357 * Folios that could not be migrated are still in @migrate_folios. Add 358 * those back on @folio_list 359 */ 360 if (!list_empty(&migrate_folios)) 361 list_splice_init(&migrate_folios, folio_list); 362 363 try_to_unmap_flush(); 364 365 list_splice(&ret_folios, folio_list); 366 367 while (!list_empty(folio_list)) { 368 folio = lru_to_folio(folio_list); 369 list_del(&folio->lru); 370 folio_putback_lru(folio); 371 } 372 373 return nr_migrated; 374 } 375 376 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) 377 { 378 int nid; 379 unsigned long nr_migrated = 0; 380 LIST_HEAD(node_folio_list); 381 unsigned int noreclaim_flag; 382 383 if (list_empty(folio_list)) 384 return nr_migrated; 385 386 if (target_nid < 0 || target_nid >= MAX_NUMNODES || 387 !node_state(target_nid, N_MEMORY)) 388 return nr_migrated; 389 390 noreclaim_flag = memalloc_noreclaim_save(); 391 392 nid = folio_nid(lru_to_folio(folio_list)); 393 do { 394 struct folio *folio = lru_to_folio(folio_list); 395 396 if (nid == folio_nid(folio)) { 397 list_move(&folio->lru, &node_folio_list); 398 continue; 399 } 400 401 nr_migrated += damon_migrate_folio_list(&node_folio_list, 402 NODE_DATA(nid), 403 target_nid); 404 nid = folio_nid(lru_to_folio(folio_list)); 405 } while (!list_empty(folio_list)); 406 407 nr_migrated += damon_migrate_folio_list(&node_folio_list, 408 NODE_DATA(nid), 409 target_nid); 410 411 memalloc_noreclaim_restore(noreclaim_flag); 412 413 return nr_migrated; 414 } 415