1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Code for Virtual Address Spaces 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-va: " fmt 9 10 #include <linux/highmem.h> 11 #include <linux/hugetlb.h> 12 #include <linux/mman.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/page_idle.h> 15 #include <linux/pagewalk.h> 16 #include <linux/sched/mm.h> 17 18 #include "../internal.h" 19 #include "ops-common.h" 20 21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 22 #undef DAMON_MIN_REGION_SZ 23 #define DAMON_MIN_REGION_SZ 1 24 #endif 25 26 /* 27 * 't->pid' should be the pointer to the relevant 'struct pid' having reference 28 * count. Caller must put the returned task, unless it is NULL. 29 */ 30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 31 { 32 return get_pid_task(t->pid, PIDTYPE_PID); 33 } 34 35 /* 36 * Get the mm_struct of the given target 37 * 38 * Caller _must_ put the mm_struct after use, unless it is NULL. 39 * 40 * Returns the mm_struct of the target on success, NULL on failure 41 */ 42 static struct mm_struct *damon_get_mm(struct damon_target *t) 43 { 44 struct task_struct *task; 45 struct mm_struct *mm; 46 47 task = damon_get_task_struct(t); 48 if (!task) 49 return NULL; 50 51 mm = get_task_mm(task); 52 put_task_struct(task); 53 return mm; 54 } 55 56 static unsigned long sz_range(struct damon_addr_range *r) 57 { 58 return r->end - r->start; 59 } 60 61 /* 62 * Find three regions separated by two biggest unmapped regions 63 * 64 * vma the head vma of the target address space 65 * regions an array of three address ranges that results will be saved 66 * 67 * This function receives an address space and finds three regions in it which 68 * separated by the two biggest unmapped regions in the space. Please refer to 69 * below comments of '__damon_va_init_regions()' function to know why this is 70 * necessary. 71 * 72 * Returns 0 if success, or negative error code otherwise. 73 */ 74 static int __damon_va_three_regions(struct mm_struct *mm, 75 struct damon_addr_range regions[3]) 76 { 77 struct damon_addr_range first_gap = {0}, second_gap = {0}; 78 VMA_ITERATOR(vmi, mm, 0); 79 struct vm_area_struct *vma, *prev = NULL; 80 unsigned long start; 81 82 /* 83 * Find the two biggest gaps so that first_gap > second_gap > others. 84 * If this is too slow, it can be optimised to examine the maple 85 * tree gaps. 86 */ 87 rcu_read_lock(); 88 for_each_vma(vmi, vma) { 89 unsigned long gap; 90 91 if (!prev) { 92 start = vma->vm_start; 93 goto next; 94 } 95 gap = vma->vm_start - prev->vm_end; 96 97 if (gap > sz_range(&first_gap)) { 98 second_gap = first_gap; 99 first_gap.start = prev->vm_end; 100 first_gap.end = vma->vm_start; 101 } else if (gap > sz_range(&second_gap)) { 102 second_gap.start = prev->vm_end; 103 second_gap.end = vma->vm_start; 104 } 105 next: 106 prev = vma; 107 } 108 rcu_read_unlock(); 109 110 if (!sz_range(&second_gap) || !sz_range(&first_gap)) 111 return -EINVAL; 112 113 /* Sort the two biggest gaps by address */ 114 if (first_gap.start > second_gap.start) 115 swap(first_gap, second_gap); 116 117 /* Store the result */ 118 regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ); 119 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ); 120 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ); 121 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ); 122 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ); 123 regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ); 124 125 return 0; 126 } 127 128 /* 129 * Get the three regions in the given target (task) 130 * 131 * Returns 0 on success, negative error code otherwise. 132 */ 133 static int damon_va_three_regions(struct damon_target *t, 134 struct damon_addr_range regions[3]) 135 { 136 struct mm_struct *mm; 137 int rc; 138 139 mm = damon_get_mm(t); 140 if (!mm) 141 return -EINVAL; 142 143 mmap_read_lock(mm); 144 rc = __damon_va_three_regions(mm, regions); 145 mmap_read_unlock(mm); 146 147 mmput(mm); 148 return rc; 149 } 150 151 /* 152 * Initialize the monitoring target regions for the given target (task) 153 * 154 * t the given target 155 * 156 * Because only a number of small portions of the entire address space 157 * is actually mapped to the memory and accessed, monitoring the unmapped 158 * regions is wasteful. That said, because we can deal with small noises, 159 * tracking every mapping is not strictly required but could even incur a high 160 * overhead if the mapping frequently changes or the number of mappings is 161 * high. The adaptive regions adjustment mechanism will further help to deal 162 * with the noise by simply identifying the unmapped areas as a region that 163 * has no access. Moreover, applying the real mappings that would have many 164 * unmapped areas inside will make the adaptive mechanism quite complex. That 165 * said, too huge unmapped areas inside the monitoring target should be removed 166 * to not take the time for the adaptive mechanism. 167 * 168 * For the reason, we convert the complex mappings to three distinct regions 169 * that cover every mapped area of the address space. Also the two gaps 170 * between the three regions are the two biggest unmapped areas in the given 171 * address space. In detail, this function first identifies the start and the 172 * end of the mappings and the two biggest unmapped areas of the address space. 173 * Then, it constructs the three regions as below: 174 * 175 * [mappings[0]->start, big_two_unmapped_areas[0]->start) 176 * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 177 * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 178 * 179 * As usual memory map of processes is as below, the gap between the heap and 180 * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 181 * region and the stack will be two biggest unmapped regions. Because these 182 * gaps are exceptionally huge areas in usual address space, excluding these 183 * two biggest unmapped regions will be sufficient to make a trade-off. 184 * 185 * <heap> 186 * <BIG UNMAPPED REGION 1> 187 * <uppermost mmap()-ed region> 188 * (other mmap()-ed regions and small unmapped regions) 189 * <lowermost mmap()-ed region> 190 * <BIG UNMAPPED REGION 2> 191 * <stack> 192 */ 193 static void __damon_va_init_regions(struct damon_ctx *ctx, 194 struct damon_target *t) 195 { 196 struct damon_target *ti; 197 struct damon_addr_range regions[3]; 198 int tidx = 0; 199 200 if (damon_va_three_regions(t, regions)) { 201 damon_for_each_target(ti, ctx) { 202 if (ti == t) 203 break; 204 tidx++; 205 } 206 pr_debug("Failed to get three regions of %dth target\n", tidx); 207 return; 208 } 209 210 damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ); 211 } 212 213 /* Initialize '->regions_list' of every target (task) */ 214 static void damon_va_init(struct damon_ctx *ctx) 215 { 216 struct damon_target *t; 217 218 damon_for_each_target(t, ctx) { 219 /* the user may set the target regions as they want */ 220 if (!damon_nr_regions(t)) 221 __damon_va_init_regions(ctx, t); 222 } 223 } 224 225 /* 226 * Update regions for current memory mappings 227 */ 228 static void damon_va_update(struct damon_ctx *ctx) 229 { 230 struct damon_addr_range three_regions[3]; 231 struct damon_target *t; 232 233 damon_for_each_target(t, ctx) { 234 if (damon_va_three_regions(t, three_regions)) 235 continue; 236 damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ); 237 } 238 } 239 240 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 241 unsigned long next, struct mm_walk *walk) 242 { 243 pte_t *pte; 244 spinlock_t *ptl; 245 246 ptl = pmd_trans_huge_lock(pmd, walk->vma); 247 if (ptl) { 248 pmd_t pmde = pmdp_get(pmd); 249 250 if (pmd_present(pmde)) 251 damon_pmdp_mkold(pmd, walk->vma, addr); 252 spin_unlock(ptl); 253 return 0; 254 } 255 256 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 257 if (!pte) 258 return 0; 259 if (!pte_present(ptep_get(pte))) 260 goto out; 261 damon_ptep_mkold(pte, walk->vma, addr); 262 out: 263 pte_unmap_unlock(pte, ptl); 264 return 0; 265 } 266 267 #ifdef CONFIG_HUGETLB_PAGE 268 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 269 struct vm_area_struct *vma, unsigned long addr) 270 { 271 bool referenced = false; 272 pte_t entry = huge_ptep_get(mm, addr, pte); 273 struct folio *folio = pfn_folio(pte_pfn(entry)); 274 unsigned long psize = huge_page_size(hstate_vma(vma)); 275 276 folio_get(folio); 277 278 if (pte_young(entry)) { 279 referenced = true; 280 entry = pte_mkold(entry); 281 set_huge_pte_at(mm, addr, pte, entry, psize); 282 } 283 284 if (mmu_notifier_clear_young(mm, addr, 285 addr + huge_page_size(hstate_vma(vma)))) 286 referenced = true; 287 288 if (referenced) 289 folio_set_young(folio); 290 291 folio_set_idle(folio); 292 folio_put(folio); 293 } 294 295 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 296 unsigned long addr, unsigned long end, 297 struct mm_walk *walk) 298 { 299 struct hstate *h = hstate_vma(walk->vma); 300 spinlock_t *ptl; 301 pte_t entry; 302 303 ptl = huge_pte_lock(h, walk->mm, pte); 304 entry = huge_ptep_get(walk->mm, addr, pte); 305 if (!pte_present(entry)) 306 goto out; 307 308 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 309 310 out: 311 spin_unlock(ptl); 312 return 0; 313 } 314 #else 315 #define damon_mkold_hugetlb_entry NULL 316 #endif /* CONFIG_HUGETLB_PAGE */ 317 318 static const struct mm_walk_ops damon_mkold_ops = { 319 .pmd_entry = damon_mkold_pmd_entry, 320 .hugetlb_entry = damon_mkold_hugetlb_entry, 321 .walk_lock = PGWALK_RDLOCK, 322 }; 323 324 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 325 { 326 mmap_read_lock(mm); 327 walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 328 mmap_read_unlock(mm); 329 } 330 331 /* 332 * Functions for the access checking of the regions 333 */ 334 335 static void __damon_va_prepare_access_check(struct mm_struct *mm, 336 struct damon_region *r, 337 struct damon_ctx *ctx) 338 { 339 r->sampling_addr = damon_rand(ctx, r->ar.start, r->ar.end); 340 341 damon_va_mkold(mm, r->sampling_addr); 342 } 343 344 static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 345 { 346 struct damon_target *t; 347 struct mm_struct *mm; 348 struct damon_region *r; 349 350 damon_for_each_target(t, ctx) { 351 mm = damon_get_mm(t); 352 if (!mm) 353 continue; 354 damon_for_each_region(r, t) 355 __damon_va_prepare_access_check(mm, r, ctx); 356 mmput(mm); 357 } 358 } 359 360 struct damon_young_walk_private { 361 /* size of the folio for the access checked virtual memory address */ 362 unsigned long *folio_sz; 363 bool young; 364 }; 365 366 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 367 unsigned long next, struct mm_walk *walk) 368 { 369 pte_t *pte; 370 pte_t ptent; 371 spinlock_t *ptl; 372 struct folio *folio; 373 struct damon_young_walk_private *priv = walk->private; 374 375 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 376 ptl = pmd_trans_huge_lock(pmd, walk->vma); 377 if (ptl) { 378 pmd_t pmde = pmdp_get(pmd); 379 380 if (!pmd_present(pmde)) 381 goto huge_out; 382 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 383 if (!folio) 384 goto huge_out; 385 if (pmd_young(pmde) || !folio_test_idle(folio) || 386 mmu_notifier_test_young(walk->mm, 387 addr)) 388 priv->young = true; 389 *priv->folio_sz = HPAGE_PMD_SIZE; 390 huge_out: 391 spin_unlock(ptl); 392 return 0; 393 } 394 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 395 396 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 397 if (!pte) 398 return 0; 399 ptent = ptep_get(pte); 400 if (!pte_present(ptent)) 401 goto out; 402 folio = vm_normal_folio(walk->vma, addr, ptent); 403 if (!folio) 404 goto out; 405 if (pte_young(ptent) || !folio_test_idle(folio) || 406 mmu_notifier_test_young(walk->mm, addr)) 407 priv->young = true; 408 *priv->folio_sz = folio_size(folio); 409 out: 410 pte_unmap_unlock(pte, ptl); 411 return 0; 412 } 413 414 #ifdef CONFIG_HUGETLB_PAGE 415 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 416 unsigned long addr, unsigned long end, 417 struct mm_walk *walk) 418 { 419 struct damon_young_walk_private *priv = walk->private; 420 struct hstate *h = hstate_vma(walk->vma); 421 struct folio *folio; 422 spinlock_t *ptl; 423 pte_t entry; 424 425 ptl = huge_pte_lock(h, walk->mm, pte); 426 entry = huge_ptep_get(walk->mm, addr, pte); 427 if (!pte_present(entry)) 428 goto out; 429 430 folio = pfn_folio(pte_pfn(entry)); 431 folio_get(folio); 432 433 if (pte_young(entry) || !folio_test_idle(folio) || 434 mmu_notifier_test_young(walk->mm, addr)) 435 priv->young = true; 436 *priv->folio_sz = huge_page_size(h); 437 438 folio_put(folio); 439 440 out: 441 spin_unlock(ptl); 442 return 0; 443 } 444 #else 445 #define damon_young_hugetlb_entry NULL 446 #endif /* CONFIG_HUGETLB_PAGE */ 447 448 static const struct mm_walk_ops damon_young_ops = { 449 .pmd_entry = damon_young_pmd_entry, 450 .hugetlb_entry = damon_young_hugetlb_entry, 451 .walk_lock = PGWALK_RDLOCK, 452 }; 453 454 static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 455 unsigned long *folio_sz) 456 { 457 struct damon_young_walk_private arg = { 458 .folio_sz = folio_sz, 459 .young = false, 460 }; 461 462 mmap_read_lock(mm); 463 walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 464 mmap_read_unlock(mm); 465 return arg.young; 466 } 467 468 /* 469 * Check whether the region was accessed after the last preparation 470 * 471 * mm 'mm_struct' for the given virtual address space 472 * r the region to be checked 473 */ 474 static void __damon_va_check_access(struct mm_struct *mm, 475 struct damon_region *r, bool same_target, 476 struct damon_attrs *attrs) 477 { 478 static unsigned long last_addr; 479 static unsigned long last_folio_sz = PAGE_SIZE; 480 static bool last_accessed; 481 482 if (!mm) { 483 damon_update_region_access_rate(r, false, attrs); 484 return; 485 } 486 487 /* If the region is in the last checked page, reuse the result */ 488 if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 489 ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 490 damon_update_region_access_rate(r, last_accessed, attrs); 491 return; 492 } 493 494 last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 495 damon_update_region_access_rate(r, last_accessed, attrs); 496 497 last_addr = r->sampling_addr; 498 } 499 500 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 501 { 502 struct damon_target *t; 503 struct mm_struct *mm; 504 struct damon_region *r; 505 unsigned int max_nr_accesses = 0; 506 bool same_target; 507 508 damon_for_each_target(t, ctx) { 509 mm = damon_get_mm(t); 510 same_target = false; 511 damon_for_each_region(r, t) { 512 __damon_va_check_access(mm, r, same_target, 513 &ctx->attrs); 514 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 515 same_target = true; 516 } 517 if (mm) 518 mmput(mm); 519 } 520 521 return max_nr_accesses; 522 } 523 524 static bool damos_va_filter_young_match(struct damos_filter *filter, 525 struct folio *folio, struct vm_area_struct *vma, 526 unsigned long addr, pte_t *ptep, pmd_t *pmdp) 527 { 528 bool young = false; 529 530 if (ptep) 531 young = pte_young(ptep_get(ptep)); 532 else if (pmdp) 533 young = pmd_young(pmdp_get(pmdp)); 534 535 young = young || !folio_test_idle(folio) || 536 mmu_notifier_test_young(vma->vm_mm, addr); 537 538 if (young && ptep) 539 damon_ptep_mkold(ptep, vma, addr); 540 else if (young && pmdp) 541 damon_pmdp_mkold(pmdp, vma, addr); 542 543 return young == filter->matching; 544 } 545 546 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, 547 struct vm_area_struct *vma, unsigned long addr, 548 pte_t *ptep, pmd_t *pmdp) 549 { 550 struct damos_filter *filter; 551 bool matched; 552 553 if (scheme->core_filters_allowed) 554 return false; 555 556 damos_for_each_ops_filter(filter, scheme) { 557 /* 558 * damos_folio_filter_match checks the young filter by doing an 559 * rmap on the folio to find its page table. However, being the 560 * vaddr scheme, we have direct access to the page tables, so 561 * use that instead. 562 */ 563 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) 564 matched = damos_va_filter_young_match(filter, folio, 565 vma, addr, ptep, pmdp); 566 else 567 matched = damos_folio_filter_match(filter, folio); 568 569 if (matched) 570 return !filter->allow; 571 } 572 return scheme->ops_filters_default_reject; 573 } 574 575 struct damos_va_migrate_private { 576 struct list_head *migration_lists; 577 struct damos *scheme; 578 }; 579 580 /* 581 * Place the given folio in the migration_list corresponding to where the folio 582 * should be migrated. 583 * 584 * The algorithm used here is similar to weighted_interleave_nid() 585 */ 586 static void damos_va_migrate_dests_add(struct folio *folio, 587 struct vm_area_struct *vma, unsigned long addr, 588 struct damos_migrate_dests *dests, 589 struct list_head *migration_lists) 590 { 591 pgoff_t ilx; 592 int order; 593 unsigned int target; 594 unsigned int weight_total = 0; 595 int i; 596 597 /* 598 * If dests is empty, there is only one migration list corresponding 599 * to s->target_nid. 600 */ 601 if (!dests->nr_dests) { 602 i = 0; 603 goto isolate; 604 } 605 606 order = folio_order(folio); 607 ilx = vma->vm_pgoff >> order; 608 ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); 609 610 for (i = 0; i < dests->nr_dests; i++) 611 weight_total += dests->weight_arr[i]; 612 613 /* If the total weights are somehow 0, don't migrate at all */ 614 if (!weight_total) 615 return; 616 617 target = ilx % weight_total; 618 for (i = 0; i < dests->nr_dests; i++) { 619 if (target < dests->weight_arr[i]) 620 break; 621 target -= dests->weight_arr[i]; 622 } 623 624 /* If the folio is already in the right node, don't do anything */ 625 if (folio_nid(folio) == dests->node_id_arr[i]) 626 return; 627 628 isolate: 629 if (!folio_isolate_lru(folio)) 630 return; 631 632 list_add(&folio->lru, &migration_lists[i]); 633 } 634 635 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, 636 unsigned long next, struct mm_walk *walk) 637 { 638 struct damos_va_migrate_private *priv = walk->private; 639 struct list_head *migration_lists = priv->migration_lists; 640 struct damos *s = priv->scheme; 641 struct damos_migrate_dests *dests = &s->migrate_dests; 642 struct folio *folio; 643 spinlock_t *ptl; 644 pte_t *start_pte, *pte, ptent; 645 int nr; 646 647 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 648 ptl = pmd_trans_huge_lock(pmd, walk->vma); 649 if (ptl) { 650 pmd_t pmde = pmdp_get(pmd); 651 652 if (!pmd_present(pmde)) 653 goto huge_out; 654 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 655 if (!folio) 656 goto huge_out; 657 if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) 658 goto huge_out; 659 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 660 migration_lists); 661 huge_out: 662 spin_unlock(ptl); 663 return 0; 664 } 665 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 666 667 start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 668 if (!pte) 669 return 0; 670 671 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 672 nr = 1; 673 ptent = ptep_get(pte); 674 675 if (pte_none(ptent) || !pte_present(ptent)) 676 continue; 677 folio = vm_normal_folio(walk->vma, addr, ptent); 678 if (!folio) 679 continue; 680 if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) 681 continue; 682 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 683 migration_lists); 684 nr = folio_nr_pages(folio); 685 } 686 pte_unmap_unlock(start_pte, ptl); 687 return 0; 688 } 689 690 /* 691 * Functions for the target validity check and cleanup 692 */ 693 694 static bool damon_va_target_valid(struct damon_target *t) 695 { 696 struct task_struct *task; 697 698 task = damon_get_task_struct(t); 699 if (task) { 700 put_task_struct(task); 701 return true; 702 } 703 704 return false; 705 } 706 707 static void damon_va_cleanup_target(struct damon_target *t) 708 { 709 put_pid(t->pid); 710 } 711 712 #ifndef CONFIG_ADVISE_SYSCALLS 713 static unsigned long damos_madvise(struct damon_target *target, 714 struct damon_region *r, int behavior) 715 { 716 return 0; 717 } 718 #else 719 static unsigned long damos_madvise(struct damon_target *target, 720 struct damon_region *r, int behavior) 721 { 722 struct mm_struct *mm; 723 unsigned long start = PAGE_ALIGN(r->ar.start); 724 unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 725 unsigned long applied; 726 727 mm = damon_get_mm(target); 728 if (!mm) 729 return 0; 730 731 applied = do_madvise(mm, start, len, behavior) ? 0 : len; 732 mmput(mm); 733 734 return applied; 735 } 736 #endif /* CONFIG_ADVISE_SYSCALLS */ 737 738 static unsigned long damos_va_migrate(struct damon_target *target, 739 struct damon_region *r, struct damos *s, 740 unsigned long *sz_filter_passed) 741 { 742 LIST_HEAD(folio_list); 743 struct damos_va_migrate_private priv; 744 struct mm_struct *mm; 745 int nr_dests; 746 int nid; 747 bool use_target_nid; 748 unsigned long applied = 0; 749 struct damos_migrate_dests *dests = &s->migrate_dests; 750 struct mm_walk_ops walk_ops = { 751 .pmd_entry = damos_va_migrate_pmd_entry, 752 .pte_entry = NULL, 753 .walk_lock = PGWALK_RDLOCK, 754 }; 755 756 use_target_nid = dests->nr_dests == 0; 757 nr_dests = use_target_nid ? 1 : dests->nr_dests; 758 priv.scheme = s; 759 priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests); 760 if (!priv.migration_lists) 761 return 0; 762 763 for (int i = 0; i < nr_dests; i++) 764 INIT_LIST_HEAD(&priv.migration_lists[i]); 765 766 767 mm = damon_get_mm(target); 768 if (!mm) 769 goto free_lists; 770 771 mmap_read_lock(mm); 772 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 773 mmap_read_unlock(mm); 774 mmput(mm); 775 776 for (int i = 0; i < nr_dests; i++) { 777 nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; 778 applied += damon_migrate_pages(&priv.migration_lists[i], nid); 779 cond_resched(); 780 } 781 782 free_lists: 783 kfree(priv.migration_lists); 784 return applied * PAGE_SIZE; 785 } 786 787 struct damos_va_stat_private { 788 struct damos *scheme; 789 unsigned long *sz_filter_passed; 790 }; 791 792 static inline bool damos_va_invalid_folio(struct folio *folio, 793 struct damos *s) 794 { 795 return !folio || folio == s->last_applied; 796 } 797 798 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, 799 unsigned long next, struct mm_walk *walk) 800 { 801 struct damos_va_stat_private *priv = walk->private; 802 struct damos *s = priv->scheme; 803 unsigned long *sz_filter_passed = priv->sz_filter_passed; 804 struct vm_area_struct *vma = walk->vma; 805 struct folio *folio; 806 spinlock_t *ptl; 807 pte_t *start_pte, *pte, ptent; 808 int nr; 809 810 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 811 ptl = pmd_trans_huge_lock(pmd, vma); 812 if (ptl) { 813 pmd_t pmde = pmdp_get(pmd); 814 815 if (!pmd_present(pmde)) 816 goto huge_unlock; 817 818 folio = vm_normal_folio_pmd(vma, addr, pmde); 819 820 if (damos_va_invalid_folio(folio, s)) 821 goto huge_unlock; 822 823 if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) 824 *sz_filter_passed += folio_size(folio); 825 s->last_applied = folio; 826 827 huge_unlock: 828 spin_unlock(ptl); 829 return 0; 830 } 831 #endif 832 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 833 if (!start_pte) 834 return 0; 835 836 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 837 nr = 1; 838 ptent = ptep_get(pte); 839 840 if (pte_none(ptent) || !pte_present(ptent)) 841 continue; 842 843 folio = vm_normal_folio(vma, addr, ptent); 844 845 if (damos_va_invalid_folio(folio, s)) 846 continue; 847 848 if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) 849 *sz_filter_passed += folio_size(folio); 850 nr = folio_nr_pages(folio); 851 s->last_applied = folio; 852 } 853 pte_unmap_unlock(start_pte, ptl); 854 return 0; 855 } 856 857 static unsigned long damos_va_stat(struct damon_target *target, 858 struct damon_region *r, struct damos *s, 859 unsigned long *sz_filter_passed) 860 { 861 struct damos_va_stat_private priv; 862 struct mm_struct *mm; 863 struct mm_walk_ops walk_ops = { 864 .pmd_entry = damos_va_stat_pmd_entry, 865 .walk_lock = PGWALK_RDLOCK, 866 }; 867 868 priv.scheme = s; 869 priv.sz_filter_passed = sz_filter_passed; 870 871 if (!damos_ops_has_filter(s)) 872 return 0; 873 874 mm = damon_get_mm(target); 875 if (!mm) 876 return 0; 877 878 mmap_read_lock(mm); 879 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 880 mmap_read_unlock(mm); 881 mmput(mm); 882 return 0; 883 } 884 885 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 886 struct damon_target *t, struct damon_region *r, 887 struct damos *scheme, unsigned long *sz_filter_passed) 888 { 889 int madv_action; 890 891 switch (scheme->action) { 892 case DAMOS_WILLNEED: 893 madv_action = MADV_WILLNEED; 894 break; 895 case DAMOS_COLD: 896 madv_action = MADV_COLD; 897 break; 898 case DAMOS_PAGEOUT: 899 madv_action = MADV_PAGEOUT; 900 break; 901 case DAMOS_HUGEPAGE: 902 madv_action = MADV_HUGEPAGE; 903 break; 904 case DAMOS_NOHUGEPAGE: 905 madv_action = MADV_NOHUGEPAGE; 906 break; 907 case DAMOS_COLLAPSE: 908 madv_action = MADV_COLLAPSE; 909 break; 910 case DAMOS_MIGRATE_HOT: 911 case DAMOS_MIGRATE_COLD: 912 return damos_va_migrate(t, r, scheme, sz_filter_passed); 913 case DAMOS_STAT: 914 return damos_va_stat(t, r, scheme, sz_filter_passed); 915 default: 916 /* 917 * DAMOS actions that are not yet supported by 'vaddr'. 918 */ 919 return 0; 920 } 921 922 return damos_madvise(t, r, madv_action); 923 } 924 925 static int damon_va_scheme_score(struct damon_ctx *context, 926 struct damon_region *r, struct damos *scheme) 927 { 928 929 switch (scheme->action) { 930 case DAMOS_PAGEOUT: 931 return damon_cold_score(context, r, scheme); 932 case DAMOS_MIGRATE_HOT: 933 return damon_hot_score(context, r, scheme); 934 case DAMOS_MIGRATE_COLD: 935 return damon_cold_score(context, r, scheme); 936 default: 937 break; 938 } 939 940 return DAMOS_MAX_SCORE; 941 } 942 943 static int __init damon_va_initcall(void) 944 { 945 struct damon_operations ops = { 946 .id = DAMON_OPS_VADDR, 947 .init = damon_va_init, 948 .update = damon_va_update, 949 .prepare_access_checks = damon_va_prepare_access_checks, 950 .check_accesses = damon_va_check_accesses, 951 .target_valid = damon_va_target_valid, 952 .cleanup_target = damon_va_cleanup_target, 953 .apply_scheme = damon_va_apply_scheme, 954 .get_scheme_score = damon_va_scheme_score, 955 }; 956 /* ops for fixed virtual address ranges */ 957 struct damon_operations ops_fvaddr = ops; 958 int err; 959 960 /* Don't set the monitoring target regions for the entire mapping */ 961 ops_fvaddr.id = DAMON_OPS_FVADDR; 962 ops_fvaddr.init = NULL; 963 ops_fvaddr.update = NULL; 964 965 err = damon_register_ops(&ops); 966 if (err) 967 return err; 968 return damon_register_ops(&ops_fvaddr); 969 }; 970 971 subsys_initcall(damon_va_initcall); 972 973 #include "tests/vaddr-kunit.h" 974