1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Code for Virtual Address Spaces 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-va: " fmt 9 10 #include <linux/highmem.h> 11 #include <linux/hugetlb.h> 12 #include <linux/mman.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/page_idle.h> 15 #include <linux/pagewalk.h> 16 #include <linux/sched/mm.h> 17 18 #include "../internal.h" 19 #include "ops-common.h" 20 21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 22 #undef DAMON_MIN_REGION_SZ 23 #define DAMON_MIN_REGION_SZ 1 24 #endif 25 26 /* 27 * 't->pid' should be the pointer to the relevant 'struct pid' having reference 28 * count. Caller must put the returned task, unless it is NULL. 29 */ 30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 31 { 32 return get_pid_task(t->pid, PIDTYPE_PID); 33 } 34 35 /* 36 * Get the mm_struct of the given target 37 * 38 * Caller _must_ put the mm_struct after use, unless it is NULL. 39 * 40 * Returns the mm_struct of the target on success, NULL on failure 41 */ 42 static struct mm_struct *damon_get_mm(struct damon_target *t) 43 { 44 struct task_struct *task; 45 struct mm_struct *mm; 46 47 task = damon_get_task_struct(t); 48 if (!task) 49 return NULL; 50 51 mm = get_task_mm(task); 52 put_task_struct(task); 53 return mm; 54 } 55 56 static unsigned long sz_range(struct damon_addr_range *r) 57 { 58 return r->end - r->start; 59 } 60 61 /* 62 * Find three regions separated by two biggest unmapped regions 63 * 64 * vma the head vma of the target address space 65 * regions an array of three address ranges that results will be saved 66 * 67 * This function receives an address space and finds three regions in it which 68 * separated by the two biggest unmapped regions in the space. Please refer to 69 * below comments of '__damon_va_init_regions()' function to know why this is 70 * necessary. 71 * 72 * Returns 0 if success, or negative error code otherwise. 73 */ 74 static int __damon_va_three_regions(struct mm_struct *mm, 75 struct damon_addr_range regions[3]) 76 { 77 struct damon_addr_range first_gap = {0}, second_gap = {0}; 78 VMA_ITERATOR(vmi, mm, 0); 79 struct vm_area_struct *vma, *prev = NULL; 80 unsigned long start; 81 82 /* 83 * Find the two biggest gaps so that first_gap > second_gap > others. 84 * If this is too slow, it can be optimised to examine the maple 85 * tree gaps. 86 */ 87 rcu_read_lock(); 88 for_each_vma(vmi, vma) { 89 unsigned long gap; 90 91 if (!prev) { 92 start = vma->vm_start; 93 goto next; 94 } 95 gap = vma->vm_start - prev->vm_end; 96 97 if (gap > sz_range(&first_gap)) { 98 second_gap = first_gap; 99 first_gap.start = prev->vm_end; 100 first_gap.end = vma->vm_start; 101 } else if (gap > sz_range(&second_gap)) { 102 second_gap.start = prev->vm_end; 103 second_gap.end = vma->vm_start; 104 } 105 next: 106 prev = vma; 107 } 108 rcu_read_unlock(); 109 110 if (!sz_range(&second_gap) || !sz_range(&first_gap)) 111 return -EINVAL; 112 113 /* Sort the two biggest gaps by address */ 114 if (first_gap.start > second_gap.start) 115 swap(first_gap, second_gap); 116 117 /* Store the result */ 118 regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ); 119 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ); 120 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ); 121 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ); 122 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ); 123 regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ); 124 125 return 0; 126 } 127 128 /* 129 * Get the three regions in the given target (task) 130 * 131 * Returns 0 on success, negative error code otherwise. 132 */ 133 static int damon_va_three_regions(struct damon_target *t, 134 struct damon_addr_range regions[3]) 135 { 136 struct mm_struct *mm; 137 int rc; 138 139 mm = damon_get_mm(t); 140 if (!mm) 141 return -EINVAL; 142 143 mmap_read_lock(mm); 144 rc = __damon_va_three_regions(mm, regions); 145 mmap_read_unlock(mm); 146 147 mmput(mm); 148 return rc; 149 } 150 151 /* 152 * Initialize the monitoring target regions for the given target (task) 153 * 154 * t the given target 155 * 156 * Because only a number of small portions of the entire address space 157 * is actually mapped to the memory and accessed, monitoring the unmapped 158 * regions is wasteful. That said, because we can deal with small noises, 159 * tracking every mapping is not strictly required but could even incur a high 160 * overhead if the mapping frequently changes or the number of mappings is 161 * high. The adaptive regions adjustment mechanism will further help to deal 162 * with the noise by simply identifying the unmapped areas as a region that 163 * has no access. Moreover, applying the real mappings that would have many 164 * unmapped areas inside will make the adaptive mechanism quite complex. That 165 * said, too huge unmapped areas inside the monitoring target should be removed 166 * to not take the time for the adaptive mechanism. 167 * 168 * For the reason, we convert the complex mappings to three distinct regions 169 * that cover every mapped area of the address space. Also the two gaps 170 * between the three regions are the two biggest unmapped areas in the given 171 * address space. In detail, this function first identifies the start and the 172 * end of the mappings and the two biggest unmapped areas of the address space. 173 * Then, it constructs the three regions as below: 174 * 175 * [mappings[0]->start, big_two_unmapped_areas[0]->start) 176 * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 177 * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 178 * 179 * As usual memory map of processes is as below, the gap between the heap and 180 * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 181 * region and the stack will be two biggest unmapped regions. Because these 182 * gaps are exceptionally huge areas in usual address space, excluding these 183 * two biggest unmapped regions will be sufficient to make a trade-off. 184 * 185 * <heap> 186 * <BIG UNMAPPED REGION 1> 187 * <uppermost mmap()-ed region> 188 * (other mmap()-ed regions and small unmapped regions) 189 * <lowermost mmap()-ed region> 190 * <BIG UNMAPPED REGION 2> 191 * <stack> 192 */ 193 static void __damon_va_init_regions(struct damon_ctx *ctx, 194 struct damon_target *t) 195 { 196 struct damon_target *ti; 197 struct damon_addr_range regions[3]; 198 int tidx = 0; 199 200 if (damon_va_three_regions(t, regions)) { 201 damon_for_each_target(ti, ctx) { 202 if (ti == t) 203 break; 204 tidx++; 205 } 206 pr_debug("Failed to get three regions of %dth target\n", tidx); 207 return; 208 } 209 210 damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ); 211 } 212 213 /* Initialize '->regions_list' of every target (task) */ 214 static void damon_va_init(struct damon_ctx *ctx) 215 { 216 struct damon_target *t; 217 218 damon_for_each_target(t, ctx) { 219 /* the user may set the target regions as they want */ 220 if (!damon_nr_regions(t)) 221 __damon_va_init_regions(ctx, t); 222 } 223 } 224 225 /* 226 * Update regions for current memory mappings 227 */ 228 static void damon_va_update(struct damon_ctx *ctx) 229 { 230 struct damon_addr_range three_regions[3]; 231 struct damon_target *t; 232 233 damon_for_each_target(t, ctx) { 234 if (damon_va_three_regions(t, three_regions)) 235 continue; 236 damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ); 237 } 238 } 239 240 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 241 unsigned long next, struct mm_walk *walk) 242 { 243 pte_t *pte; 244 spinlock_t *ptl; 245 246 ptl = pmd_trans_huge_lock(pmd, walk->vma); 247 if (ptl) { 248 pmd_t pmde = pmdp_get(pmd); 249 250 if (pmd_present(pmde)) 251 damon_pmdp_mkold(pmd, walk->vma, addr); 252 spin_unlock(ptl); 253 return 0; 254 } 255 256 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 257 if (!pte) 258 return 0; 259 if (!pte_present(ptep_get(pte))) 260 goto out; 261 damon_ptep_mkold(pte, walk->vma, addr); 262 out: 263 pte_unmap_unlock(pte, ptl); 264 return 0; 265 } 266 267 #ifdef CONFIG_HUGETLB_PAGE 268 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 269 struct vm_area_struct *vma, unsigned long addr) 270 { 271 bool referenced = false; 272 pte_t entry = huge_ptep_get(mm, addr, pte); 273 struct folio *folio = pfn_folio(pte_pfn(entry)); 274 unsigned long psize = huge_page_size(hstate_vma(vma)); 275 276 folio_get(folio); 277 278 if (pte_young(entry)) { 279 referenced = true; 280 entry = pte_mkold(entry); 281 set_huge_pte_at(mm, addr, pte, entry, psize); 282 } 283 284 if (mmu_notifier_clear_young(mm, addr, 285 addr + huge_page_size(hstate_vma(vma)))) 286 referenced = true; 287 288 if (referenced) 289 folio_set_young(folio); 290 291 folio_set_idle(folio); 292 folio_put(folio); 293 } 294 295 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 296 unsigned long addr, unsigned long end, 297 struct mm_walk *walk) 298 { 299 struct hstate *h = hstate_vma(walk->vma); 300 spinlock_t *ptl; 301 pte_t entry; 302 303 ptl = huge_pte_lock(h, walk->mm, pte); 304 entry = huge_ptep_get(walk->mm, addr, pte); 305 if (!pte_present(entry)) 306 goto out; 307 308 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 309 310 out: 311 spin_unlock(ptl); 312 return 0; 313 } 314 #else 315 #define damon_mkold_hugetlb_entry NULL 316 #endif /* CONFIG_HUGETLB_PAGE */ 317 318 static const struct mm_walk_ops damon_mkold_ops = { 319 .pmd_entry = damon_mkold_pmd_entry, 320 .hugetlb_entry = damon_mkold_hugetlb_entry, 321 .walk_lock = PGWALK_RDLOCK, 322 }; 323 324 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 325 { 326 mmap_read_lock(mm); 327 walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 328 mmap_read_unlock(mm); 329 } 330 331 /* 332 * Functions for the access checking of the regions 333 */ 334 335 static void __damon_va_prepare_access_check(struct mm_struct *mm, 336 struct damon_region *r) 337 { 338 r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 339 340 damon_va_mkold(mm, r->sampling_addr); 341 } 342 343 static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 344 { 345 struct damon_target *t; 346 struct mm_struct *mm; 347 struct damon_region *r; 348 349 damon_for_each_target(t, ctx) { 350 mm = damon_get_mm(t); 351 if (!mm) 352 continue; 353 damon_for_each_region(r, t) 354 __damon_va_prepare_access_check(mm, r); 355 mmput(mm); 356 } 357 } 358 359 struct damon_young_walk_private { 360 /* size of the folio for the access checked virtual memory address */ 361 unsigned long *folio_sz; 362 bool young; 363 }; 364 365 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 366 unsigned long next, struct mm_walk *walk) 367 { 368 pte_t *pte; 369 pte_t ptent; 370 spinlock_t *ptl; 371 struct folio *folio; 372 struct damon_young_walk_private *priv = walk->private; 373 374 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 375 ptl = pmd_trans_huge_lock(pmd, walk->vma); 376 if (ptl) { 377 pmd_t pmde = pmdp_get(pmd); 378 379 if (!pmd_present(pmde)) 380 goto huge_out; 381 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 382 if (!folio) 383 goto huge_out; 384 if (pmd_young(pmde) || !folio_test_idle(folio) || 385 mmu_notifier_test_young(walk->mm, 386 addr)) 387 priv->young = true; 388 *priv->folio_sz = HPAGE_PMD_SIZE; 389 huge_out: 390 spin_unlock(ptl); 391 return 0; 392 } 393 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 394 395 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 396 if (!pte) 397 return 0; 398 ptent = ptep_get(pte); 399 if (!pte_present(ptent)) 400 goto out; 401 folio = vm_normal_folio(walk->vma, addr, ptent); 402 if (!folio) 403 goto out; 404 if (pte_young(ptent) || !folio_test_idle(folio) || 405 mmu_notifier_test_young(walk->mm, addr)) 406 priv->young = true; 407 *priv->folio_sz = folio_size(folio); 408 out: 409 pte_unmap_unlock(pte, ptl); 410 return 0; 411 } 412 413 #ifdef CONFIG_HUGETLB_PAGE 414 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 415 unsigned long addr, unsigned long end, 416 struct mm_walk *walk) 417 { 418 struct damon_young_walk_private *priv = walk->private; 419 struct hstate *h = hstate_vma(walk->vma); 420 struct folio *folio; 421 spinlock_t *ptl; 422 pte_t entry; 423 424 ptl = huge_pte_lock(h, walk->mm, pte); 425 entry = huge_ptep_get(walk->mm, addr, pte); 426 if (!pte_present(entry)) 427 goto out; 428 429 folio = pfn_folio(pte_pfn(entry)); 430 folio_get(folio); 431 432 if (pte_young(entry) || !folio_test_idle(folio) || 433 mmu_notifier_test_young(walk->mm, addr)) 434 priv->young = true; 435 *priv->folio_sz = huge_page_size(h); 436 437 folio_put(folio); 438 439 out: 440 spin_unlock(ptl); 441 return 0; 442 } 443 #else 444 #define damon_young_hugetlb_entry NULL 445 #endif /* CONFIG_HUGETLB_PAGE */ 446 447 static const struct mm_walk_ops damon_young_ops = { 448 .pmd_entry = damon_young_pmd_entry, 449 .hugetlb_entry = damon_young_hugetlb_entry, 450 .walk_lock = PGWALK_RDLOCK, 451 }; 452 453 static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 454 unsigned long *folio_sz) 455 { 456 struct damon_young_walk_private arg = { 457 .folio_sz = folio_sz, 458 .young = false, 459 }; 460 461 mmap_read_lock(mm); 462 walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 463 mmap_read_unlock(mm); 464 return arg.young; 465 } 466 467 /* 468 * Check whether the region was accessed after the last preparation 469 * 470 * mm 'mm_struct' for the given virtual address space 471 * r the region to be checked 472 */ 473 static void __damon_va_check_access(struct mm_struct *mm, 474 struct damon_region *r, bool same_target, 475 struct damon_attrs *attrs) 476 { 477 static unsigned long last_addr; 478 static unsigned long last_folio_sz = PAGE_SIZE; 479 static bool last_accessed; 480 481 if (!mm) { 482 damon_update_region_access_rate(r, false, attrs); 483 return; 484 } 485 486 /* If the region is in the last checked page, reuse the result */ 487 if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 488 ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 489 damon_update_region_access_rate(r, last_accessed, attrs); 490 return; 491 } 492 493 last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 494 damon_update_region_access_rate(r, last_accessed, attrs); 495 496 last_addr = r->sampling_addr; 497 } 498 499 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 500 { 501 struct damon_target *t; 502 struct mm_struct *mm; 503 struct damon_region *r; 504 unsigned int max_nr_accesses = 0; 505 bool same_target; 506 507 damon_for_each_target(t, ctx) { 508 mm = damon_get_mm(t); 509 same_target = false; 510 damon_for_each_region(r, t) { 511 __damon_va_check_access(mm, r, same_target, 512 &ctx->attrs); 513 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 514 same_target = true; 515 } 516 if (mm) 517 mmput(mm); 518 } 519 520 return max_nr_accesses; 521 } 522 523 static bool damos_va_filter_young_match(struct damos_filter *filter, 524 struct folio *folio, struct vm_area_struct *vma, 525 unsigned long addr, pte_t *ptep, pmd_t *pmdp) 526 { 527 bool young = false; 528 529 if (ptep) 530 young = pte_young(ptep_get(ptep)); 531 else if (pmdp) 532 young = pmd_young(pmdp_get(pmdp)); 533 534 young = young || !folio_test_idle(folio) || 535 mmu_notifier_test_young(vma->vm_mm, addr); 536 537 if (young && ptep) 538 damon_ptep_mkold(ptep, vma, addr); 539 else if (young && pmdp) 540 damon_pmdp_mkold(pmdp, vma, addr); 541 542 return young == filter->matching; 543 } 544 545 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, 546 struct vm_area_struct *vma, unsigned long addr, 547 pte_t *ptep, pmd_t *pmdp) 548 { 549 struct damos_filter *filter; 550 bool matched; 551 552 if (scheme->core_filters_allowed) 553 return false; 554 555 damos_for_each_ops_filter(filter, scheme) { 556 /* 557 * damos_folio_filter_match checks the young filter by doing an 558 * rmap on the folio to find its page table. However, being the 559 * vaddr scheme, we have direct access to the page tables, so 560 * use that instead. 561 */ 562 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) 563 matched = damos_va_filter_young_match(filter, folio, 564 vma, addr, ptep, pmdp); 565 else 566 matched = damos_folio_filter_match(filter, folio); 567 568 if (matched) 569 return !filter->allow; 570 } 571 return scheme->ops_filters_default_reject; 572 } 573 574 struct damos_va_migrate_private { 575 struct list_head *migration_lists; 576 struct damos *scheme; 577 }; 578 579 /* 580 * Place the given folio in the migration_list corresponding to where the folio 581 * should be migrated. 582 * 583 * The algorithm used here is similar to weighted_interleave_nid() 584 */ 585 static void damos_va_migrate_dests_add(struct folio *folio, 586 struct vm_area_struct *vma, unsigned long addr, 587 struct damos_migrate_dests *dests, 588 struct list_head *migration_lists) 589 { 590 pgoff_t ilx; 591 int order; 592 unsigned int target; 593 unsigned int weight_total = 0; 594 int i; 595 596 /* 597 * If dests is empty, there is only one migration list corresponding 598 * to s->target_nid. 599 */ 600 if (!dests->nr_dests) { 601 i = 0; 602 goto isolate; 603 } 604 605 order = folio_order(folio); 606 ilx = vma->vm_pgoff >> order; 607 ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); 608 609 for (i = 0; i < dests->nr_dests; i++) 610 weight_total += dests->weight_arr[i]; 611 612 /* If the total weights are somehow 0, don't migrate at all */ 613 if (!weight_total) 614 return; 615 616 target = ilx % weight_total; 617 for (i = 0; i < dests->nr_dests; i++) { 618 if (target < dests->weight_arr[i]) 619 break; 620 target -= dests->weight_arr[i]; 621 } 622 623 /* If the folio is already in the right node, don't do anything */ 624 if (folio_nid(folio) == dests->node_id_arr[i]) 625 return; 626 627 isolate: 628 if (!folio_isolate_lru(folio)) 629 return; 630 631 list_add(&folio->lru, &migration_lists[i]); 632 } 633 634 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, 635 unsigned long next, struct mm_walk *walk) 636 { 637 struct damos_va_migrate_private *priv = walk->private; 638 struct list_head *migration_lists = priv->migration_lists; 639 struct damos *s = priv->scheme; 640 struct damos_migrate_dests *dests = &s->migrate_dests; 641 struct folio *folio; 642 spinlock_t *ptl; 643 pte_t *start_pte, *pte, ptent; 644 int nr; 645 646 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 647 ptl = pmd_trans_huge_lock(pmd, walk->vma); 648 if (ptl) { 649 pmd_t pmde = pmdp_get(pmd); 650 651 if (!pmd_present(pmde)) 652 goto huge_out; 653 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 654 if (!folio) 655 goto huge_out; 656 if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) 657 goto huge_out; 658 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 659 migration_lists); 660 huge_out: 661 spin_unlock(ptl); 662 return 0; 663 } 664 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 665 666 start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 667 if (!pte) 668 return 0; 669 670 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 671 nr = 1; 672 ptent = ptep_get(pte); 673 674 if (pte_none(ptent) || !pte_present(ptent)) 675 continue; 676 folio = vm_normal_folio(walk->vma, addr, ptent); 677 if (!folio) 678 continue; 679 if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) 680 continue; 681 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 682 migration_lists); 683 nr = folio_nr_pages(folio); 684 } 685 pte_unmap_unlock(start_pte, ptl); 686 return 0; 687 } 688 689 /* 690 * Functions for the target validity check and cleanup 691 */ 692 693 static bool damon_va_target_valid(struct damon_target *t) 694 { 695 struct task_struct *task; 696 697 task = damon_get_task_struct(t); 698 if (task) { 699 put_task_struct(task); 700 return true; 701 } 702 703 return false; 704 } 705 706 static void damon_va_cleanup_target(struct damon_target *t) 707 { 708 put_pid(t->pid); 709 } 710 711 #ifndef CONFIG_ADVISE_SYSCALLS 712 static unsigned long damos_madvise(struct damon_target *target, 713 struct damon_region *r, int behavior) 714 { 715 return 0; 716 } 717 #else 718 static unsigned long damos_madvise(struct damon_target *target, 719 struct damon_region *r, int behavior) 720 { 721 struct mm_struct *mm; 722 unsigned long start = PAGE_ALIGN(r->ar.start); 723 unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 724 unsigned long applied; 725 726 mm = damon_get_mm(target); 727 if (!mm) 728 return 0; 729 730 applied = do_madvise(mm, start, len, behavior) ? 0 : len; 731 mmput(mm); 732 733 return applied; 734 } 735 #endif /* CONFIG_ADVISE_SYSCALLS */ 736 737 static unsigned long damos_va_migrate(struct damon_target *target, 738 struct damon_region *r, struct damos *s, 739 unsigned long *sz_filter_passed) 740 { 741 LIST_HEAD(folio_list); 742 struct damos_va_migrate_private priv; 743 struct mm_struct *mm; 744 int nr_dests; 745 int nid; 746 bool use_target_nid; 747 unsigned long applied = 0; 748 struct damos_migrate_dests *dests = &s->migrate_dests; 749 struct mm_walk_ops walk_ops = { 750 .pmd_entry = damos_va_migrate_pmd_entry, 751 .pte_entry = NULL, 752 .walk_lock = PGWALK_RDLOCK, 753 }; 754 755 use_target_nid = dests->nr_dests == 0; 756 nr_dests = use_target_nid ? 1 : dests->nr_dests; 757 priv.scheme = s; 758 priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests); 759 if (!priv.migration_lists) 760 return 0; 761 762 for (int i = 0; i < nr_dests; i++) 763 INIT_LIST_HEAD(&priv.migration_lists[i]); 764 765 766 mm = damon_get_mm(target); 767 if (!mm) 768 goto free_lists; 769 770 mmap_read_lock(mm); 771 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 772 mmap_read_unlock(mm); 773 mmput(mm); 774 775 for (int i = 0; i < nr_dests; i++) { 776 nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; 777 applied += damon_migrate_pages(&priv.migration_lists[i], nid); 778 cond_resched(); 779 } 780 781 free_lists: 782 kfree(priv.migration_lists); 783 return applied * PAGE_SIZE; 784 } 785 786 struct damos_va_stat_private { 787 struct damos *scheme; 788 unsigned long *sz_filter_passed; 789 }; 790 791 static inline bool damos_va_invalid_folio(struct folio *folio, 792 struct damos *s) 793 { 794 return !folio || folio == s->last_applied; 795 } 796 797 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, 798 unsigned long next, struct mm_walk *walk) 799 { 800 struct damos_va_stat_private *priv = walk->private; 801 struct damos *s = priv->scheme; 802 unsigned long *sz_filter_passed = priv->sz_filter_passed; 803 struct vm_area_struct *vma = walk->vma; 804 struct folio *folio; 805 spinlock_t *ptl; 806 pte_t *start_pte, *pte, ptent; 807 int nr; 808 809 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 810 ptl = pmd_trans_huge_lock(pmd, vma); 811 if (ptl) { 812 pmd_t pmde = pmdp_get(pmd); 813 814 if (!pmd_present(pmde)) 815 goto huge_unlock; 816 817 folio = vm_normal_folio_pmd(vma, addr, pmde); 818 819 if (damos_va_invalid_folio(folio, s)) 820 goto huge_unlock; 821 822 if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) 823 *sz_filter_passed += folio_size(folio); 824 s->last_applied = folio; 825 826 huge_unlock: 827 spin_unlock(ptl); 828 return 0; 829 } 830 #endif 831 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 832 if (!start_pte) 833 return 0; 834 835 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 836 nr = 1; 837 ptent = ptep_get(pte); 838 839 if (pte_none(ptent) || !pte_present(ptent)) 840 continue; 841 842 folio = vm_normal_folio(vma, addr, ptent); 843 844 if (damos_va_invalid_folio(folio, s)) 845 continue; 846 847 if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) 848 *sz_filter_passed += folio_size(folio); 849 nr = folio_nr_pages(folio); 850 s->last_applied = folio; 851 } 852 pte_unmap_unlock(start_pte, ptl); 853 return 0; 854 } 855 856 static unsigned long damos_va_stat(struct damon_target *target, 857 struct damon_region *r, struct damos *s, 858 unsigned long *sz_filter_passed) 859 { 860 struct damos_va_stat_private priv; 861 struct mm_struct *mm; 862 struct mm_walk_ops walk_ops = { 863 .pmd_entry = damos_va_stat_pmd_entry, 864 .walk_lock = PGWALK_RDLOCK, 865 }; 866 867 priv.scheme = s; 868 priv.sz_filter_passed = sz_filter_passed; 869 870 if (!damos_ops_has_filter(s)) 871 return 0; 872 873 mm = damon_get_mm(target); 874 if (!mm) 875 return 0; 876 877 mmap_read_lock(mm); 878 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 879 mmap_read_unlock(mm); 880 mmput(mm); 881 return 0; 882 } 883 884 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 885 struct damon_target *t, struct damon_region *r, 886 struct damos *scheme, unsigned long *sz_filter_passed) 887 { 888 int madv_action; 889 890 switch (scheme->action) { 891 case DAMOS_WILLNEED: 892 madv_action = MADV_WILLNEED; 893 break; 894 case DAMOS_COLD: 895 madv_action = MADV_COLD; 896 break; 897 case DAMOS_PAGEOUT: 898 madv_action = MADV_PAGEOUT; 899 break; 900 case DAMOS_HUGEPAGE: 901 madv_action = MADV_HUGEPAGE; 902 break; 903 case DAMOS_NOHUGEPAGE: 904 madv_action = MADV_NOHUGEPAGE; 905 break; 906 case DAMOS_MIGRATE_HOT: 907 case DAMOS_MIGRATE_COLD: 908 return damos_va_migrate(t, r, scheme, sz_filter_passed); 909 case DAMOS_STAT: 910 return damos_va_stat(t, r, scheme, sz_filter_passed); 911 default: 912 /* 913 * DAMOS actions that are not yet supported by 'vaddr'. 914 */ 915 return 0; 916 } 917 918 return damos_madvise(t, r, madv_action); 919 } 920 921 static int damon_va_scheme_score(struct damon_ctx *context, 922 struct damon_region *r, struct damos *scheme) 923 { 924 925 switch (scheme->action) { 926 case DAMOS_PAGEOUT: 927 return damon_cold_score(context, r, scheme); 928 case DAMOS_MIGRATE_HOT: 929 return damon_hot_score(context, r, scheme); 930 case DAMOS_MIGRATE_COLD: 931 return damon_cold_score(context, r, scheme); 932 default: 933 break; 934 } 935 936 return DAMOS_MAX_SCORE; 937 } 938 939 static int __init damon_va_initcall(void) 940 { 941 struct damon_operations ops = { 942 .id = DAMON_OPS_VADDR, 943 .init = damon_va_init, 944 .update = damon_va_update, 945 .prepare_access_checks = damon_va_prepare_access_checks, 946 .check_accesses = damon_va_check_accesses, 947 .target_valid = damon_va_target_valid, 948 .cleanup_target = damon_va_cleanup_target, 949 .apply_scheme = damon_va_apply_scheme, 950 .get_scheme_score = damon_va_scheme_score, 951 }; 952 /* ops for fixed virtual address ranges */ 953 struct damon_operations ops_fvaddr = ops; 954 int err; 955 956 /* Don't set the monitoring target regions for the entire mapping */ 957 ops_fvaddr.id = DAMON_OPS_FVADDR; 958 ops_fvaddr.init = NULL; 959 ops_fvaddr.update = NULL; 960 961 err = damon_register_ops(&ops); 962 if (err) 963 return err; 964 return damon_register_ops(&ops_fvaddr); 965 }; 966 967 subsys_initcall(damon_va_initcall); 968 969 #include "tests/vaddr-kunit.h" 970