1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common Code for Data Access Monitoring 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #include <linux/migrate.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/page_idle.h> 11 #include <linux/pagemap.h> 12 #include <linux/rmap.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 16 #include "../internal.h" 17 #include "ops-common.h" 18 19 /* 20 * Get an online page for a pfn if it's in the LRU list. Otherwise, returns 21 * NULL. 22 * 23 * The body of this function is stolen from the 'page_idle_get_folio()'. We 24 * steal rather than reuse it because the code is quite simple. 25 */ 26 struct folio *damon_get_folio(unsigned long pfn) 27 { 28 struct page *page = pfn_to_online_page(pfn); 29 struct folio *folio; 30 31 if (!page) 32 return NULL; 33 34 folio = page_folio(page); 35 if (!folio_test_lru(folio) || !folio_try_get(folio)) 36 return NULL; 37 if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { 38 folio_put(folio); 39 folio = NULL; 40 } 41 return folio; 42 } 43 44 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) 45 { 46 pte_t pteval = ptep_get(pte); 47 struct folio *folio; 48 bool young = false; 49 unsigned long pfn; 50 51 if (likely(pte_present(pteval))) 52 pfn = pte_pfn(pteval); 53 else 54 pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); 55 56 folio = damon_get_folio(pfn); 57 if (!folio) 58 return; 59 60 /* 61 * PFN swap PTEs, such as device-exclusive ones, that actually map pages 62 * are "old" from a CPU perspective. The MMU notifier takes care of any 63 * device aspects. 64 */ 65 if (likely(pte_present(pteval))) 66 young |= ptep_test_and_clear_young(vma, addr, pte); 67 young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); 68 if (young) 69 folio_set_young(folio); 70 71 folio_set_idle(folio); 72 folio_put(folio); 73 } 74 75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) 76 { 77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 78 struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd))); 79 80 if (!folio) 81 return; 82 83 if (pmdp_clear_young_notify(vma, addr, pmd)) 84 folio_set_young(folio); 85 86 folio_set_idle(folio); 87 folio_put(folio); 88 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 89 } 90 91 #define DAMON_MAX_SUBSCORE (100) 92 #define DAMON_MAX_AGE_IN_LOG (32) 93 94 int damon_hot_score(struct damon_ctx *c, struct damon_region *r, 95 struct damos *s) 96 { 97 int freq_subscore; 98 unsigned int age_in_sec; 99 int age_in_log, age_subscore; 100 unsigned int freq_weight = s->quota.weight_nr_accesses; 101 unsigned int age_weight = s->quota.weight_age; 102 int hotness; 103 104 freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / 105 damon_max_nr_accesses(&c->attrs); 106 107 age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; 108 for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; 109 age_in_log++, age_in_sec >>= 1) 110 ; 111 112 /* If frequency is 0, higher age means it's colder */ 113 if (freq_subscore == 0) 114 age_in_log *= -1; 115 116 /* 117 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. 118 * Scale it to be in [0, 100] and set it as age subscore. 119 */ 120 age_in_log += DAMON_MAX_AGE_IN_LOG; 121 age_subscore = age_in_log * DAMON_MAX_SUBSCORE / 122 DAMON_MAX_AGE_IN_LOG / 2; 123 124 hotness = (freq_weight * freq_subscore + age_weight * age_subscore); 125 if (freq_weight + age_weight) 126 hotness /= freq_weight + age_weight; 127 /* 128 * Transform it to fit in [0, DAMOS_MAX_SCORE] 129 */ 130 hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; 131 132 return hotness; 133 } 134 135 int damon_cold_score(struct damon_ctx *c, struct damon_region *r, 136 struct damos *s) 137 { 138 int hotness = damon_hot_score(c, r, s); 139 140 /* Return coldness of the region */ 141 return DAMOS_MAX_SCORE - hotness; 142 } 143 144 static bool damon_folio_mkold_one(struct folio *folio, 145 struct vm_area_struct *vma, unsigned long addr, void *arg) 146 { 147 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 148 149 while (page_vma_mapped_walk(&pvmw)) { 150 addr = pvmw.address; 151 if (pvmw.pte) 152 damon_ptep_mkold(pvmw.pte, vma, addr); 153 else 154 damon_pmdp_mkold(pvmw.pmd, vma, addr); 155 } 156 return true; 157 } 158 159 void damon_folio_mkold(struct folio *folio) 160 { 161 struct rmap_walk_control rwc = { 162 .rmap_one = damon_folio_mkold_one, 163 .anon_lock = folio_lock_anon_vma_read, 164 }; 165 166 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 167 folio_set_idle(folio); 168 return; 169 } 170 171 if (!folio_trylock(folio)) 172 return; 173 174 rmap_walk(folio, &rwc); 175 folio_unlock(folio); 176 177 } 178 179 static bool damon_folio_young_one(struct folio *folio, 180 struct vm_area_struct *vma, unsigned long addr, void *arg) 181 { 182 bool *accessed = arg; 183 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 184 pte_t pte; 185 186 *accessed = false; 187 while (page_vma_mapped_walk(&pvmw)) { 188 addr = pvmw.address; 189 if (pvmw.pte) { 190 pte = ptep_get(pvmw.pte); 191 192 /* 193 * PFN swap PTEs, such as device-exclusive ones, that 194 * actually map pages are "old" from a CPU perspective. 195 * The MMU notifier takes care of any device aspects. 196 */ 197 *accessed = (pte_present(pte) && pte_young(pte)) || 198 !folio_test_idle(folio) || 199 mmu_notifier_test_young(vma->vm_mm, addr); 200 } else { 201 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 202 *accessed = pmd_young(pmdp_get(pvmw.pmd)) || 203 !folio_test_idle(folio) || 204 mmu_notifier_test_young(vma->vm_mm, addr); 205 #else 206 WARN_ON_ONCE(1); 207 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 208 } 209 if (*accessed) { 210 page_vma_mapped_walk_done(&pvmw); 211 break; 212 } 213 } 214 215 /* If accessed, stop walking */ 216 return *accessed == false; 217 } 218 219 bool damon_folio_young(struct folio *folio) 220 { 221 bool accessed = false; 222 struct rmap_walk_control rwc = { 223 .arg = &accessed, 224 .rmap_one = damon_folio_young_one, 225 .anon_lock = folio_lock_anon_vma_read, 226 }; 227 228 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 229 if (folio_test_idle(folio)) 230 return false; 231 else 232 return true; 233 } 234 235 if (!folio_trylock(folio)) 236 return false; 237 238 rmap_walk(folio, &rwc); 239 folio_unlock(folio); 240 241 return accessed; 242 } 243 244 bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) 245 { 246 bool matched = false; 247 struct mem_cgroup *memcg; 248 size_t folio_sz; 249 250 switch (filter->type) { 251 case DAMOS_FILTER_TYPE_ANON: 252 matched = folio_test_anon(folio); 253 break; 254 case DAMOS_FILTER_TYPE_ACTIVE: 255 matched = folio_test_active(folio); 256 break; 257 case DAMOS_FILTER_TYPE_MEMCG: 258 rcu_read_lock(); 259 memcg = folio_memcg_check(folio); 260 if (!memcg) 261 matched = false; 262 else 263 matched = filter->memcg_id == mem_cgroup_id(memcg); 264 rcu_read_unlock(); 265 break; 266 case DAMOS_FILTER_TYPE_YOUNG: 267 matched = damon_folio_young(folio); 268 if (matched) 269 damon_folio_mkold(folio); 270 break; 271 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: 272 folio_sz = folio_size(folio); 273 matched = filter->sz_range.min <= folio_sz && 274 folio_sz <= filter->sz_range.max; 275 break; 276 case DAMOS_FILTER_TYPE_UNMAPPED: 277 matched = !folio_mapped(folio) || !folio_raw_mapping(folio); 278 break; 279 default: 280 break; 281 } 282 283 return matched == filter->matching; 284 } 285 286 static unsigned int __damon_migrate_folio_list( 287 struct list_head *migrate_folios, struct pglist_data *pgdat, 288 int target_nid) 289 { 290 unsigned int nr_succeeded = 0; 291 struct migration_target_control mtc = { 292 /* 293 * Allocate from 'node', or fail quickly and quietly. 294 * When this happens, 'page' will likely just be discarded 295 * instead of migrated. 296 */ 297 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 298 __GFP_NOMEMALLOC | GFP_NOWAIT, 299 .nid = target_nid, 300 }; 301 302 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 303 return 0; 304 305 if (list_empty(migrate_folios)) 306 return 0; 307 308 /* Migration ignores all cpuset and mempolicy settings */ 309 migrate_pages(migrate_folios, alloc_migration_target, NULL, 310 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 311 &nr_succeeded); 312 313 return nr_succeeded; 314 } 315 316 static unsigned int damon_migrate_folio_list(struct list_head *folio_list, 317 struct pglist_data *pgdat, 318 int target_nid) 319 { 320 unsigned int nr_migrated = 0; 321 struct folio *folio; 322 LIST_HEAD(ret_folios); 323 LIST_HEAD(migrate_folios); 324 325 while (!list_empty(folio_list)) { 326 struct folio *folio; 327 328 cond_resched(); 329 330 folio = lru_to_folio(folio_list); 331 list_del(&folio->lru); 332 333 if (!folio_trylock(folio)) 334 goto keep; 335 336 /* Relocate its contents to another node. */ 337 list_add(&folio->lru, &migrate_folios); 338 folio_unlock(folio); 339 continue; 340 keep: 341 list_add(&folio->lru, &ret_folios); 342 } 343 /* 'folio_list' is always empty here */ 344 345 /* Migrate folios selected for migration */ 346 nr_migrated += __damon_migrate_folio_list( 347 &migrate_folios, pgdat, target_nid); 348 /* 349 * Folios that could not be migrated are still in @migrate_folios. Add 350 * those back on @folio_list 351 */ 352 if (!list_empty(&migrate_folios)) 353 list_splice_init(&migrate_folios, folio_list); 354 355 try_to_unmap_flush(); 356 357 list_splice(&ret_folios, folio_list); 358 359 while (!list_empty(folio_list)) { 360 folio = lru_to_folio(folio_list); 361 list_del(&folio->lru); 362 folio_putback_lru(folio); 363 } 364 365 return nr_migrated; 366 } 367 368 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) 369 { 370 int nid; 371 unsigned long nr_migrated = 0; 372 LIST_HEAD(node_folio_list); 373 unsigned int noreclaim_flag; 374 375 if (list_empty(folio_list)) 376 return nr_migrated; 377 378 if (target_nid < 0 || target_nid >= MAX_NUMNODES || 379 !node_state(target_nid, N_MEMORY)) 380 return nr_migrated; 381 382 noreclaim_flag = memalloc_noreclaim_save(); 383 384 nid = folio_nid(lru_to_folio(folio_list)); 385 do { 386 struct folio *folio = lru_to_folio(folio_list); 387 388 if (nid == folio_nid(folio)) { 389 list_move(&folio->lru, &node_folio_list); 390 continue; 391 } 392 393 nr_migrated += damon_migrate_folio_list(&node_folio_list, 394 NODE_DATA(nid), 395 target_nid); 396 nid = folio_nid(lru_to_folio(folio_list)); 397 } while (!list_empty(folio_list)); 398 399 nr_migrated += damon_migrate_folio_list(&node_folio_list, 400 NODE_DATA(nid), 401 target_nid); 402 403 memalloc_noreclaim_restore(noreclaim_flag); 404 405 return nr_migrated; 406 } 407 408 bool damos_ops_has_filter(struct damos *s) 409 { 410 struct damos_filter *f; 411 412 damos_for_each_ops_filter(f, s) 413 return true; 414 return false; 415 } 416