1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Primitives for The Physical Address Space 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-pa: " fmt 9 10 #include <linux/mmu_notifier.h> 11 #include <linux/page_idle.h> 12 #include <linux/pagemap.h> 13 #include <linux/rmap.h> 14 #include <linux/swap.h> 15 #include <linux/memory-tiers.h> 16 #include <linux/migrate.h> 17 #include <linux/mm_inline.h> 18 19 #include "../internal.h" 20 #include "ops-common.h" 21 22 static bool damon_folio_mkold_one(struct folio *folio, 23 struct vm_area_struct *vma, unsigned long addr, void *arg) 24 { 25 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 26 27 while (page_vma_mapped_walk(&pvmw)) { 28 addr = pvmw.address; 29 if (pvmw.pte) 30 damon_ptep_mkold(pvmw.pte, vma, addr); 31 else 32 damon_pmdp_mkold(pvmw.pmd, vma, addr); 33 } 34 return true; 35 } 36 37 static void damon_folio_mkold(struct folio *folio) 38 { 39 struct rmap_walk_control rwc = { 40 .rmap_one = damon_folio_mkold_one, 41 .anon_lock = folio_lock_anon_vma_read, 42 }; 43 bool need_lock; 44 45 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 46 folio_set_idle(folio); 47 return; 48 } 49 50 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 51 if (need_lock && !folio_trylock(folio)) 52 return; 53 54 rmap_walk(folio, &rwc); 55 56 if (need_lock) 57 folio_unlock(folio); 58 59 } 60 61 static void damon_pa_mkold(unsigned long paddr) 62 { 63 struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); 64 65 if (!folio) 66 return; 67 68 damon_folio_mkold(folio); 69 folio_put(folio); 70 } 71 72 static void __damon_pa_prepare_access_check(struct damon_region *r) 73 { 74 r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 75 76 damon_pa_mkold(r->sampling_addr); 77 } 78 79 static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) 80 { 81 struct damon_target *t; 82 struct damon_region *r; 83 84 damon_for_each_target(t, ctx) { 85 damon_for_each_region(r, t) 86 __damon_pa_prepare_access_check(r); 87 } 88 } 89 90 static bool damon_folio_young_one(struct folio *folio, 91 struct vm_area_struct *vma, unsigned long addr, void *arg) 92 { 93 bool *accessed = arg; 94 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 95 96 *accessed = false; 97 while (page_vma_mapped_walk(&pvmw)) { 98 addr = pvmw.address; 99 if (pvmw.pte) { 100 *accessed = pte_young(ptep_get(pvmw.pte)) || 101 !folio_test_idle(folio) || 102 mmu_notifier_test_young(vma->vm_mm, addr); 103 } else { 104 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 105 *accessed = pmd_young(pmdp_get(pvmw.pmd)) || 106 !folio_test_idle(folio) || 107 mmu_notifier_test_young(vma->vm_mm, addr); 108 #else 109 WARN_ON_ONCE(1); 110 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 111 } 112 if (*accessed) { 113 page_vma_mapped_walk_done(&pvmw); 114 break; 115 } 116 } 117 118 /* If accessed, stop walking */ 119 return *accessed == false; 120 } 121 122 static bool damon_folio_young(struct folio *folio) 123 { 124 bool accessed = false; 125 struct rmap_walk_control rwc = { 126 .arg = &accessed, 127 .rmap_one = damon_folio_young_one, 128 .anon_lock = folio_lock_anon_vma_read, 129 }; 130 bool need_lock; 131 132 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 133 if (folio_test_idle(folio)) 134 return false; 135 else 136 return true; 137 } 138 139 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 140 if (need_lock && !folio_trylock(folio)) 141 return false; 142 143 rmap_walk(folio, &rwc); 144 145 if (need_lock) 146 folio_unlock(folio); 147 148 return accessed; 149 } 150 151 static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz) 152 { 153 struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); 154 bool accessed; 155 156 if (!folio) 157 return false; 158 159 accessed = damon_folio_young(folio); 160 *folio_sz = folio_size(folio); 161 folio_put(folio); 162 return accessed; 163 } 164 165 static void __damon_pa_check_access(struct damon_region *r, 166 struct damon_attrs *attrs) 167 { 168 static unsigned long last_addr; 169 static unsigned long last_folio_sz = PAGE_SIZE; 170 static bool last_accessed; 171 172 /* If the region is in the last checked page, reuse the result */ 173 if (ALIGN_DOWN(last_addr, last_folio_sz) == 174 ALIGN_DOWN(r->sampling_addr, last_folio_sz)) { 175 damon_update_region_access_rate(r, last_accessed, attrs); 176 return; 177 } 178 179 last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz); 180 damon_update_region_access_rate(r, last_accessed, attrs); 181 182 last_addr = r->sampling_addr; 183 } 184 185 static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) 186 { 187 struct damon_target *t; 188 struct damon_region *r; 189 unsigned int max_nr_accesses = 0; 190 191 damon_for_each_target(t, ctx) { 192 damon_for_each_region(r, t) { 193 __damon_pa_check_access(r, &ctx->attrs); 194 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 195 } 196 } 197 198 return max_nr_accesses; 199 } 200 201 static bool damos_pa_filter_match(struct damos_filter *filter, 202 struct folio *folio) 203 { 204 bool matched = false; 205 struct mem_cgroup *memcg; 206 207 switch (filter->type) { 208 case DAMOS_FILTER_TYPE_ANON: 209 matched = folio_test_anon(folio); 210 break; 211 case DAMOS_FILTER_TYPE_MEMCG: 212 rcu_read_lock(); 213 memcg = folio_memcg_check(folio); 214 if (!memcg) 215 matched = false; 216 else 217 matched = filter->memcg_id == mem_cgroup_id(memcg); 218 rcu_read_unlock(); 219 break; 220 case DAMOS_FILTER_TYPE_YOUNG: 221 matched = damon_folio_young(folio); 222 if (matched) 223 damon_folio_mkold(folio); 224 break; 225 default: 226 break; 227 } 228 229 return matched == filter->matching; 230 } 231 232 /* 233 * damos_pa_filter_out - Return true if the page should be filtered out. 234 */ 235 static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) 236 { 237 struct damos_filter *filter; 238 239 if (scheme->core_filters_allowed) 240 return false; 241 242 damos_for_each_filter(filter, scheme) { 243 if (damos_pa_filter_match(filter, folio)) 244 return !filter->allow; 245 } 246 return false; 247 } 248 249 static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, 250 unsigned long *sz_filter_passed) 251 { 252 unsigned long addr, applied; 253 LIST_HEAD(folio_list); 254 bool install_young_filter = true; 255 struct damos_filter *filter; 256 257 /* check access in page level again by default */ 258 damos_for_each_filter(filter, s) { 259 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { 260 install_young_filter = false; 261 break; 262 } 263 } 264 if (install_young_filter) { 265 filter = damos_new_filter( 266 DAMOS_FILTER_TYPE_YOUNG, true, false); 267 if (!filter) 268 return 0; 269 damos_add_filter(s, filter); 270 } 271 272 for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { 273 struct folio *folio = damon_get_folio(PHYS_PFN(addr)); 274 275 if (!folio) 276 continue; 277 278 if (damos_pa_filter_out(s, folio)) 279 goto put_folio; 280 else 281 *sz_filter_passed += folio_size(folio); 282 283 folio_clear_referenced(folio); 284 folio_test_clear_young(folio); 285 if (!folio_isolate_lru(folio)) 286 goto put_folio; 287 if (folio_test_unevictable(folio)) 288 folio_putback_lru(folio); 289 else 290 list_add(&folio->lru, &folio_list); 291 put_folio: 292 folio_put(folio); 293 } 294 if (install_young_filter) 295 damos_destroy_filter(filter); 296 applied = reclaim_pages(&folio_list); 297 cond_resched(); 298 return applied * PAGE_SIZE; 299 } 300 301 static inline unsigned long damon_pa_mark_accessed_or_deactivate( 302 struct damon_region *r, struct damos *s, bool mark_accessed, 303 unsigned long *sz_filter_passed) 304 { 305 unsigned long addr, applied = 0; 306 307 for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { 308 struct folio *folio = damon_get_folio(PHYS_PFN(addr)); 309 310 if (!folio) 311 continue; 312 313 if (damos_pa_filter_out(s, folio)) 314 goto put_folio; 315 else 316 *sz_filter_passed += folio_size(folio); 317 318 if (mark_accessed) 319 folio_mark_accessed(folio); 320 else 321 folio_deactivate(folio); 322 applied += folio_nr_pages(folio); 323 put_folio: 324 folio_put(folio); 325 } 326 return applied * PAGE_SIZE; 327 } 328 329 static unsigned long damon_pa_mark_accessed(struct damon_region *r, 330 struct damos *s, unsigned long *sz_filter_passed) 331 { 332 return damon_pa_mark_accessed_or_deactivate(r, s, true, 333 sz_filter_passed); 334 } 335 336 static unsigned long damon_pa_deactivate_pages(struct damon_region *r, 337 struct damos *s, unsigned long *sz_filter_passed) 338 { 339 return damon_pa_mark_accessed_or_deactivate(r, s, false, 340 sz_filter_passed); 341 } 342 343 static unsigned int __damon_pa_migrate_folio_list( 344 struct list_head *migrate_folios, struct pglist_data *pgdat, 345 int target_nid) 346 { 347 unsigned int nr_succeeded = 0; 348 nodemask_t allowed_mask = NODE_MASK_NONE; 349 struct migration_target_control mtc = { 350 /* 351 * Allocate from 'node', or fail quickly and quietly. 352 * When this happens, 'page' will likely just be discarded 353 * instead of migrated. 354 */ 355 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 356 __GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, 357 .nid = target_nid, 358 .nmask = &allowed_mask 359 }; 360 361 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 362 return 0; 363 364 if (list_empty(migrate_folios)) 365 return 0; 366 367 /* Migration ignores all cpuset and mempolicy settings */ 368 migrate_pages(migrate_folios, alloc_migrate_folio, NULL, 369 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 370 &nr_succeeded); 371 372 return nr_succeeded; 373 } 374 375 static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list, 376 struct pglist_data *pgdat, 377 int target_nid) 378 { 379 unsigned int nr_migrated = 0; 380 struct folio *folio; 381 LIST_HEAD(ret_folios); 382 LIST_HEAD(migrate_folios); 383 384 while (!list_empty(folio_list)) { 385 struct folio *folio; 386 387 cond_resched(); 388 389 folio = lru_to_folio(folio_list); 390 list_del(&folio->lru); 391 392 if (!folio_trylock(folio)) 393 goto keep; 394 395 /* Relocate its contents to another node. */ 396 list_add(&folio->lru, &migrate_folios); 397 folio_unlock(folio); 398 continue; 399 keep: 400 list_add(&folio->lru, &ret_folios); 401 } 402 /* 'folio_list' is always empty here */ 403 404 /* Migrate folios selected for migration */ 405 nr_migrated += __damon_pa_migrate_folio_list( 406 &migrate_folios, pgdat, target_nid); 407 /* 408 * Folios that could not be migrated are still in @migrate_folios. Add 409 * those back on @folio_list 410 */ 411 if (!list_empty(&migrate_folios)) 412 list_splice_init(&migrate_folios, folio_list); 413 414 try_to_unmap_flush(); 415 416 list_splice(&ret_folios, folio_list); 417 418 while (!list_empty(folio_list)) { 419 folio = lru_to_folio(folio_list); 420 list_del(&folio->lru); 421 folio_putback_lru(folio); 422 } 423 424 return nr_migrated; 425 } 426 427 static unsigned long damon_pa_migrate_pages(struct list_head *folio_list, 428 int target_nid) 429 { 430 int nid; 431 unsigned long nr_migrated = 0; 432 LIST_HEAD(node_folio_list); 433 unsigned int noreclaim_flag; 434 435 if (list_empty(folio_list)) 436 return nr_migrated; 437 438 noreclaim_flag = memalloc_noreclaim_save(); 439 440 nid = folio_nid(lru_to_folio(folio_list)); 441 do { 442 struct folio *folio = lru_to_folio(folio_list); 443 444 if (nid == folio_nid(folio)) { 445 list_move(&folio->lru, &node_folio_list); 446 continue; 447 } 448 449 nr_migrated += damon_pa_migrate_folio_list(&node_folio_list, 450 NODE_DATA(nid), 451 target_nid); 452 nid = folio_nid(lru_to_folio(folio_list)); 453 } while (!list_empty(folio_list)); 454 455 nr_migrated += damon_pa_migrate_folio_list(&node_folio_list, 456 NODE_DATA(nid), 457 target_nid); 458 459 memalloc_noreclaim_restore(noreclaim_flag); 460 461 return nr_migrated; 462 } 463 464 static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, 465 unsigned long *sz_filter_passed) 466 { 467 unsigned long addr, applied; 468 LIST_HEAD(folio_list); 469 470 for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { 471 struct folio *folio = damon_get_folio(PHYS_PFN(addr)); 472 473 if (!folio) 474 continue; 475 476 if (damos_pa_filter_out(s, folio)) 477 goto put_folio; 478 else 479 *sz_filter_passed += folio_size(folio); 480 481 if (!folio_isolate_lru(folio)) 482 goto put_folio; 483 list_add(&folio->lru, &folio_list); 484 put_folio: 485 folio_put(folio); 486 } 487 applied = damon_pa_migrate_pages(&folio_list, s->target_nid); 488 cond_resched(); 489 return applied * PAGE_SIZE; 490 } 491 492 static bool damon_pa_scheme_has_filter(struct damos *s) 493 { 494 struct damos_filter *f; 495 496 damos_for_each_filter(f, s) 497 return true; 498 return false; 499 } 500 501 static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s, 502 unsigned long *sz_filter_passed) 503 { 504 unsigned long addr; 505 LIST_HEAD(folio_list); 506 507 if (!damon_pa_scheme_has_filter(s)) 508 return 0; 509 510 addr = r->ar.start; 511 while (addr < r->ar.end) { 512 struct folio *folio = damon_get_folio(PHYS_PFN(addr)); 513 514 if (!folio) { 515 addr += PAGE_SIZE; 516 continue; 517 } 518 519 if (!damos_pa_filter_out(s, folio)) 520 *sz_filter_passed += folio_size(folio); 521 addr += folio_size(folio); 522 folio_put(folio); 523 } 524 return 0; 525 } 526 527 static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, 528 struct damon_target *t, struct damon_region *r, 529 struct damos *scheme, unsigned long *sz_filter_passed) 530 { 531 switch (scheme->action) { 532 case DAMOS_PAGEOUT: 533 return damon_pa_pageout(r, scheme, sz_filter_passed); 534 case DAMOS_LRU_PRIO: 535 return damon_pa_mark_accessed(r, scheme, sz_filter_passed); 536 case DAMOS_LRU_DEPRIO: 537 return damon_pa_deactivate_pages(r, scheme, sz_filter_passed); 538 case DAMOS_MIGRATE_HOT: 539 case DAMOS_MIGRATE_COLD: 540 return damon_pa_migrate(r, scheme, sz_filter_passed); 541 case DAMOS_STAT: 542 return damon_pa_stat(r, scheme, sz_filter_passed); 543 default: 544 /* DAMOS actions that not yet supported by 'paddr'. */ 545 break; 546 } 547 return 0; 548 } 549 550 static int damon_pa_scheme_score(struct damon_ctx *context, 551 struct damon_target *t, struct damon_region *r, 552 struct damos *scheme) 553 { 554 switch (scheme->action) { 555 case DAMOS_PAGEOUT: 556 return damon_cold_score(context, r, scheme); 557 case DAMOS_LRU_PRIO: 558 return damon_hot_score(context, r, scheme); 559 case DAMOS_LRU_DEPRIO: 560 return damon_cold_score(context, r, scheme); 561 case DAMOS_MIGRATE_HOT: 562 return damon_hot_score(context, r, scheme); 563 case DAMOS_MIGRATE_COLD: 564 return damon_cold_score(context, r, scheme); 565 default: 566 break; 567 } 568 569 return DAMOS_MAX_SCORE; 570 } 571 572 static int __init damon_pa_initcall(void) 573 { 574 struct damon_operations ops = { 575 .id = DAMON_OPS_PADDR, 576 .init = NULL, 577 .update = NULL, 578 .prepare_access_checks = damon_pa_prepare_access_checks, 579 .check_accesses = damon_pa_check_accesses, 580 .reset_aggregated = NULL, 581 .target_valid = NULL, 582 .cleanup = NULL, 583 .apply_scheme = damon_pa_apply_scheme, 584 .get_scheme_score = damon_pa_scheme_score, 585 }; 586 587 return damon_register_ops(&ops); 588 }; 589 590 subsys_initcall(damon_pa_initcall); 591