1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Code for Virtual Address Spaces 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-va: " fmt 9 10 #include <linux/highmem.h> 11 #include <linux/hugetlb.h> 12 #include <linux/mman.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/page_idle.h> 15 #include <linux/pagewalk.h> 16 #include <linux/sched/mm.h> 17 18 #include "../internal.h" 19 #include "ops-common.h" 20 21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 22 #undef DAMON_MIN_REGION 23 #define DAMON_MIN_REGION 1 24 #endif 25 26 /* 27 * 't->pid' should be the pointer to the relevant 'struct pid' having reference 28 * count. Caller must put the returned task, unless it is NULL. 29 */ 30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 31 { 32 return get_pid_task(t->pid, PIDTYPE_PID); 33 } 34 35 /* 36 * Get the mm_struct of the given target 37 * 38 * Caller _must_ put the mm_struct after use, unless it is NULL. 39 * 40 * Returns the mm_struct of the target on success, NULL on failure 41 */ 42 static struct mm_struct *damon_get_mm(struct damon_target *t) 43 { 44 struct task_struct *task; 45 struct mm_struct *mm; 46 47 task = damon_get_task_struct(t); 48 if (!task) 49 return NULL; 50 51 mm = get_task_mm(task); 52 put_task_struct(task); 53 return mm; 54 } 55 56 /* 57 * Functions for the initial monitoring target regions construction 58 */ 59 60 /* 61 * Size-evenly split a region into 'nr_pieces' small regions 62 * 63 * Returns 0 on success, or negative error code otherwise. 64 */ 65 static int damon_va_evenly_split_region(struct damon_target *t, 66 struct damon_region *r, unsigned int nr_pieces) 67 { 68 unsigned long sz_orig, sz_piece, orig_end; 69 struct damon_region *n = NULL, *next; 70 unsigned long start; 71 unsigned int i; 72 73 if (!r || !nr_pieces) 74 return -EINVAL; 75 76 if (nr_pieces == 1) 77 return 0; 78 79 orig_end = r->ar.end; 80 sz_orig = damon_sz_region(r); 81 sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); 82 83 if (!sz_piece) 84 return -EINVAL; 85 86 r->ar.end = r->ar.start + sz_piece; 87 next = damon_next_region(r); 88 for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) { 89 n = damon_new_region(start, start + sz_piece); 90 if (!n) 91 return -ENOMEM; 92 damon_insert_region(n, r, next, t); 93 r = n; 94 } 95 /* complement last region for possible rounding error */ 96 if (n) 97 n->ar.end = orig_end; 98 99 return 0; 100 } 101 102 static unsigned long sz_range(struct damon_addr_range *r) 103 { 104 return r->end - r->start; 105 } 106 107 /* 108 * Find three regions separated by two biggest unmapped regions 109 * 110 * vma the head vma of the target address space 111 * regions an array of three address ranges that results will be saved 112 * 113 * This function receives an address space and finds three regions in it which 114 * separated by the two biggest unmapped regions in the space. Please refer to 115 * below comments of '__damon_va_init_regions()' function to know why this is 116 * necessary. 117 * 118 * Returns 0 if success, or negative error code otherwise. 119 */ 120 static int __damon_va_three_regions(struct mm_struct *mm, 121 struct damon_addr_range regions[3]) 122 { 123 struct damon_addr_range first_gap = {0}, second_gap = {0}; 124 VMA_ITERATOR(vmi, mm, 0); 125 struct vm_area_struct *vma, *prev = NULL; 126 unsigned long start; 127 128 /* 129 * Find the two biggest gaps so that first_gap > second_gap > others. 130 * If this is too slow, it can be optimised to examine the maple 131 * tree gaps. 132 */ 133 rcu_read_lock(); 134 for_each_vma(vmi, vma) { 135 unsigned long gap; 136 137 if (!prev) { 138 start = vma->vm_start; 139 goto next; 140 } 141 gap = vma->vm_start - prev->vm_end; 142 143 if (gap > sz_range(&first_gap)) { 144 second_gap = first_gap; 145 first_gap.start = prev->vm_end; 146 first_gap.end = vma->vm_start; 147 } else if (gap > sz_range(&second_gap)) { 148 second_gap.start = prev->vm_end; 149 second_gap.end = vma->vm_start; 150 } 151 next: 152 prev = vma; 153 } 154 rcu_read_unlock(); 155 156 if (!sz_range(&second_gap) || !sz_range(&first_gap)) 157 return -EINVAL; 158 159 /* Sort the two biggest gaps by address */ 160 if (first_gap.start > second_gap.start) 161 swap(first_gap, second_gap); 162 163 /* Store the result */ 164 regions[0].start = ALIGN(start, DAMON_MIN_REGION); 165 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); 166 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); 167 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); 168 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); 169 regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); 170 171 return 0; 172 } 173 174 /* 175 * Get the three regions in the given target (task) 176 * 177 * Returns 0 on success, negative error code otherwise. 178 */ 179 static int damon_va_three_regions(struct damon_target *t, 180 struct damon_addr_range regions[3]) 181 { 182 struct mm_struct *mm; 183 int rc; 184 185 mm = damon_get_mm(t); 186 if (!mm) 187 return -EINVAL; 188 189 mmap_read_lock(mm); 190 rc = __damon_va_three_regions(mm, regions); 191 mmap_read_unlock(mm); 192 193 mmput(mm); 194 return rc; 195 } 196 197 /* 198 * Initialize the monitoring target regions for the given target (task) 199 * 200 * t the given target 201 * 202 * Because only a number of small portions of the entire address space 203 * is actually mapped to the memory and accessed, monitoring the unmapped 204 * regions is wasteful. That said, because we can deal with small noises, 205 * tracking every mapping is not strictly required but could even incur a high 206 * overhead if the mapping frequently changes or the number of mappings is 207 * high. The adaptive regions adjustment mechanism will further help to deal 208 * with the noise by simply identifying the unmapped areas as a region that 209 * has no access. Moreover, applying the real mappings that would have many 210 * unmapped areas inside will make the adaptive mechanism quite complex. That 211 * said, too huge unmapped areas inside the monitoring target should be removed 212 * to not take the time for the adaptive mechanism. 213 * 214 * For the reason, we convert the complex mappings to three distinct regions 215 * that cover every mapped area of the address space. Also the two gaps 216 * between the three regions are the two biggest unmapped areas in the given 217 * address space. In detail, this function first identifies the start and the 218 * end of the mappings and the two biggest unmapped areas of the address space. 219 * Then, it constructs the three regions as below: 220 * 221 * [mappings[0]->start, big_two_unmapped_areas[0]->start) 222 * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 223 * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 224 * 225 * As usual memory map of processes is as below, the gap between the heap and 226 * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 227 * region and the stack will be two biggest unmapped regions. Because these 228 * gaps are exceptionally huge areas in usual address space, excluding these 229 * two biggest unmapped regions will be sufficient to make a trade-off. 230 * 231 * <heap> 232 * <BIG UNMAPPED REGION 1> 233 * <uppermost mmap()-ed region> 234 * (other mmap()-ed regions and small unmapped regions) 235 * <lowermost mmap()-ed region> 236 * <BIG UNMAPPED REGION 2> 237 * <stack> 238 */ 239 static void __damon_va_init_regions(struct damon_ctx *ctx, 240 struct damon_target *t) 241 { 242 struct damon_target *ti; 243 struct damon_region *r; 244 struct damon_addr_range regions[3]; 245 unsigned long sz = 0, nr_pieces; 246 int i, tidx = 0; 247 248 if (damon_va_three_regions(t, regions)) { 249 damon_for_each_target(ti, ctx) { 250 if (ti == t) 251 break; 252 tidx++; 253 } 254 pr_debug("Failed to get three regions of %dth target\n", tidx); 255 return; 256 } 257 258 for (i = 0; i < 3; i++) 259 sz += regions[i].end - regions[i].start; 260 if (ctx->attrs.min_nr_regions) 261 sz /= ctx->attrs.min_nr_regions; 262 if (sz < DAMON_MIN_REGION) 263 sz = DAMON_MIN_REGION; 264 265 /* Set the initial three regions of the target */ 266 for (i = 0; i < 3; i++) { 267 r = damon_new_region(regions[i].start, regions[i].end); 268 if (!r) { 269 pr_err("%d'th init region creation failed\n", i); 270 return; 271 } 272 damon_add_region(r, t); 273 274 nr_pieces = (regions[i].end - regions[i].start) / sz; 275 damon_va_evenly_split_region(t, r, nr_pieces); 276 } 277 } 278 279 /* Initialize '->regions_list' of every target (task) */ 280 static void damon_va_init(struct damon_ctx *ctx) 281 { 282 struct damon_target *t; 283 284 damon_for_each_target(t, ctx) { 285 /* the user may set the target regions as they want */ 286 if (!damon_nr_regions(t)) 287 __damon_va_init_regions(ctx, t); 288 } 289 } 290 291 /* 292 * Update regions for current memory mappings 293 */ 294 static void damon_va_update(struct damon_ctx *ctx) 295 { 296 struct damon_addr_range three_regions[3]; 297 struct damon_target *t; 298 299 damon_for_each_target(t, ctx) { 300 if (damon_va_three_regions(t, three_regions)) 301 continue; 302 damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); 303 } 304 } 305 306 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 307 unsigned long next, struct mm_walk *walk) 308 { 309 pte_t *pte; 310 pmd_t pmde; 311 spinlock_t *ptl; 312 313 if (pmd_trans_huge(pmdp_get(pmd))) { 314 ptl = pmd_lock(walk->mm, pmd); 315 pmde = pmdp_get(pmd); 316 317 if (!pmd_present(pmde)) { 318 spin_unlock(ptl); 319 return 0; 320 } 321 322 if (pmd_trans_huge(pmde)) { 323 damon_pmdp_mkold(pmd, walk->vma, addr); 324 spin_unlock(ptl); 325 return 0; 326 } 327 spin_unlock(ptl); 328 } 329 330 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 331 if (!pte) 332 return 0; 333 if (!pte_present(ptep_get(pte))) 334 goto out; 335 damon_ptep_mkold(pte, walk->vma, addr); 336 out: 337 pte_unmap_unlock(pte, ptl); 338 return 0; 339 } 340 341 #ifdef CONFIG_HUGETLB_PAGE 342 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 343 struct vm_area_struct *vma, unsigned long addr) 344 { 345 bool referenced = false; 346 pte_t entry = huge_ptep_get(mm, addr, pte); 347 struct folio *folio = pfn_folio(pte_pfn(entry)); 348 unsigned long psize = huge_page_size(hstate_vma(vma)); 349 350 folio_get(folio); 351 352 if (pte_young(entry)) { 353 referenced = true; 354 entry = pte_mkold(entry); 355 set_huge_pte_at(mm, addr, pte, entry, psize); 356 } 357 358 if (mmu_notifier_clear_young(mm, addr, 359 addr + huge_page_size(hstate_vma(vma)))) 360 referenced = true; 361 362 if (referenced) 363 folio_set_young(folio); 364 365 folio_set_idle(folio); 366 folio_put(folio); 367 } 368 369 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 370 unsigned long addr, unsigned long end, 371 struct mm_walk *walk) 372 { 373 struct hstate *h = hstate_vma(walk->vma); 374 spinlock_t *ptl; 375 pte_t entry; 376 377 ptl = huge_pte_lock(h, walk->mm, pte); 378 entry = huge_ptep_get(walk->mm, addr, pte); 379 if (!pte_present(entry)) 380 goto out; 381 382 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 383 384 out: 385 spin_unlock(ptl); 386 return 0; 387 } 388 #else 389 #define damon_mkold_hugetlb_entry NULL 390 #endif /* CONFIG_HUGETLB_PAGE */ 391 392 static const struct mm_walk_ops damon_mkold_ops = { 393 .pmd_entry = damon_mkold_pmd_entry, 394 .hugetlb_entry = damon_mkold_hugetlb_entry, 395 .walk_lock = PGWALK_RDLOCK, 396 }; 397 398 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 399 { 400 mmap_read_lock(mm); 401 walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 402 mmap_read_unlock(mm); 403 } 404 405 /* 406 * Functions for the access checking of the regions 407 */ 408 409 static void __damon_va_prepare_access_check(struct mm_struct *mm, 410 struct damon_region *r) 411 { 412 r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 413 414 damon_va_mkold(mm, r->sampling_addr); 415 } 416 417 static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 418 { 419 struct damon_target *t; 420 struct mm_struct *mm; 421 struct damon_region *r; 422 423 damon_for_each_target(t, ctx) { 424 mm = damon_get_mm(t); 425 if (!mm) 426 continue; 427 damon_for_each_region(r, t) 428 __damon_va_prepare_access_check(mm, r); 429 mmput(mm); 430 } 431 } 432 433 struct damon_young_walk_private { 434 /* size of the folio for the access checked virtual memory address */ 435 unsigned long *folio_sz; 436 bool young; 437 }; 438 439 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 440 unsigned long next, struct mm_walk *walk) 441 { 442 pte_t *pte; 443 pte_t ptent; 444 spinlock_t *ptl; 445 struct folio *folio; 446 struct damon_young_walk_private *priv = walk->private; 447 448 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 449 if (pmd_trans_huge(pmdp_get(pmd))) { 450 pmd_t pmde; 451 452 ptl = pmd_lock(walk->mm, pmd); 453 pmde = pmdp_get(pmd); 454 455 if (!pmd_present(pmde)) { 456 spin_unlock(ptl); 457 return 0; 458 } 459 460 if (!pmd_trans_huge(pmde)) { 461 spin_unlock(ptl); 462 goto regular_page; 463 } 464 folio = damon_get_folio(pmd_pfn(pmde)); 465 if (!folio) 466 goto huge_out; 467 if (pmd_young(pmde) || !folio_test_idle(folio) || 468 mmu_notifier_test_young(walk->mm, 469 addr)) 470 priv->young = true; 471 *priv->folio_sz = HPAGE_PMD_SIZE; 472 folio_put(folio); 473 huge_out: 474 spin_unlock(ptl); 475 return 0; 476 } 477 478 regular_page: 479 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 480 481 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 482 if (!pte) 483 return 0; 484 ptent = ptep_get(pte); 485 if (!pte_present(ptent)) 486 goto out; 487 folio = damon_get_folio(pte_pfn(ptent)); 488 if (!folio) 489 goto out; 490 if (pte_young(ptent) || !folio_test_idle(folio) || 491 mmu_notifier_test_young(walk->mm, addr)) 492 priv->young = true; 493 *priv->folio_sz = folio_size(folio); 494 folio_put(folio); 495 out: 496 pte_unmap_unlock(pte, ptl); 497 return 0; 498 } 499 500 #ifdef CONFIG_HUGETLB_PAGE 501 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 502 unsigned long addr, unsigned long end, 503 struct mm_walk *walk) 504 { 505 struct damon_young_walk_private *priv = walk->private; 506 struct hstate *h = hstate_vma(walk->vma); 507 struct folio *folio; 508 spinlock_t *ptl; 509 pte_t entry; 510 511 ptl = huge_pte_lock(h, walk->mm, pte); 512 entry = huge_ptep_get(walk->mm, addr, pte); 513 if (!pte_present(entry)) 514 goto out; 515 516 folio = pfn_folio(pte_pfn(entry)); 517 folio_get(folio); 518 519 if (pte_young(entry) || !folio_test_idle(folio) || 520 mmu_notifier_test_young(walk->mm, addr)) 521 priv->young = true; 522 *priv->folio_sz = huge_page_size(h); 523 524 folio_put(folio); 525 526 out: 527 spin_unlock(ptl); 528 return 0; 529 } 530 #else 531 #define damon_young_hugetlb_entry NULL 532 #endif /* CONFIG_HUGETLB_PAGE */ 533 534 static const struct mm_walk_ops damon_young_ops = { 535 .pmd_entry = damon_young_pmd_entry, 536 .hugetlb_entry = damon_young_hugetlb_entry, 537 .walk_lock = PGWALK_RDLOCK, 538 }; 539 540 static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 541 unsigned long *folio_sz) 542 { 543 struct damon_young_walk_private arg = { 544 .folio_sz = folio_sz, 545 .young = false, 546 }; 547 548 mmap_read_lock(mm); 549 walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 550 mmap_read_unlock(mm); 551 return arg.young; 552 } 553 554 /* 555 * Check whether the region was accessed after the last preparation 556 * 557 * mm 'mm_struct' for the given virtual address space 558 * r the region to be checked 559 */ 560 static void __damon_va_check_access(struct mm_struct *mm, 561 struct damon_region *r, bool same_target, 562 struct damon_attrs *attrs) 563 { 564 static unsigned long last_addr; 565 static unsigned long last_folio_sz = PAGE_SIZE; 566 static bool last_accessed; 567 568 if (!mm) { 569 damon_update_region_access_rate(r, false, attrs); 570 return; 571 } 572 573 /* If the region is in the last checked page, reuse the result */ 574 if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 575 ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 576 damon_update_region_access_rate(r, last_accessed, attrs); 577 return; 578 } 579 580 last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 581 damon_update_region_access_rate(r, last_accessed, attrs); 582 583 last_addr = r->sampling_addr; 584 } 585 586 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 587 { 588 struct damon_target *t; 589 struct mm_struct *mm; 590 struct damon_region *r; 591 unsigned int max_nr_accesses = 0; 592 bool same_target; 593 594 damon_for_each_target(t, ctx) { 595 mm = damon_get_mm(t); 596 same_target = false; 597 damon_for_each_region(r, t) { 598 __damon_va_check_access(mm, r, same_target, 599 &ctx->attrs); 600 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 601 same_target = true; 602 } 603 if (mm) 604 mmput(mm); 605 } 606 607 return max_nr_accesses; 608 } 609 610 static bool damos_va_filter_young_match(struct damos_filter *filter, 611 struct folio *folio, struct vm_area_struct *vma, 612 unsigned long addr, pte_t *ptep, pmd_t *pmdp) 613 { 614 bool young = false; 615 616 if (ptep) 617 young = pte_young(ptep_get(ptep)); 618 else if (pmdp) 619 young = pmd_young(pmdp_get(pmdp)); 620 621 young = young || !folio_test_idle(folio) || 622 mmu_notifier_test_young(vma->vm_mm, addr); 623 624 if (young && ptep) 625 damon_ptep_mkold(ptep, vma, addr); 626 else if (young && pmdp) 627 damon_pmdp_mkold(pmdp, vma, addr); 628 629 return young == filter->matching; 630 } 631 632 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, 633 struct vm_area_struct *vma, unsigned long addr, 634 pte_t *ptep, pmd_t *pmdp) 635 { 636 struct damos_filter *filter; 637 bool matched; 638 639 if (scheme->core_filters_allowed) 640 return false; 641 642 damos_for_each_ops_filter(filter, scheme) { 643 /* 644 * damos_folio_filter_match checks the young filter by doing an 645 * rmap on the folio to find its page table. However, being the 646 * vaddr scheme, we have direct access to the page tables, so 647 * use that instead. 648 */ 649 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) 650 matched = damos_va_filter_young_match(filter, folio, 651 vma, addr, ptep, pmdp); 652 else 653 matched = damos_folio_filter_match(filter, folio); 654 655 if (matched) 656 return !filter->allow; 657 } 658 return scheme->ops_filters_default_reject; 659 } 660 661 struct damos_va_migrate_private { 662 struct list_head *migration_lists; 663 struct damos *scheme; 664 }; 665 666 /* 667 * Place the given folio in the migration_list corresponding to where the folio 668 * should be migrated. 669 * 670 * The algorithm used here is similar to weighted_interleave_nid() 671 */ 672 static void damos_va_migrate_dests_add(struct folio *folio, 673 struct vm_area_struct *vma, unsigned long addr, 674 struct damos_migrate_dests *dests, 675 struct list_head *migration_lists) 676 { 677 pgoff_t ilx; 678 int order; 679 unsigned int target; 680 unsigned int weight_total = 0; 681 int i; 682 683 /* 684 * If dests is empty, there is only one migration list corresponding 685 * to s->target_nid. 686 */ 687 if (!dests->nr_dests) { 688 i = 0; 689 goto isolate; 690 } 691 692 order = folio_order(folio); 693 ilx = vma->vm_pgoff >> order; 694 ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); 695 696 for (i = 0; i < dests->nr_dests; i++) 697 weight_total += dests->weight_arr[i]; 698 699 /* If the total weights are somehow 0, don't migrate at all */ 700 if (!weight_total) 701 return; 702 703 target = ilx % weight_total; 704 for (i = 0; i < dests->nr_dests; i++) { 705 if (target < dests->weight_arr[i]) 706 break; 707 target -= dests->weight_arr[i]; 708 } 709 710 /* If the folio is already in the right node, don't do anything */ 711 if (folio_nid(folio) == dests->node_id_arr[i]) 712 return; 713 714 isolate: 715 if (!folio_isolate_lru(folio)) 716 return; 717 718 list_add(&folio->lru, &migration_lists[i]); 719 } 720 721 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 722 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, 723 unsigned long next, struct mm_walk *walk) 724 { 725 struct damos_va_migrate_private *priv = walk->private; 726 struct list_head *migration_lists = priv->migration_lists; 727 struct damos *s = priv->scheme; 728 struct damos_migrate_dests *dests = &s->migrate_dests; 729 struct folio *folio; 730 spinlock_t *ptl; 731 pmd_t pmde; 732 733 ptl = pmd_lock(walk->mm, pmd); 734 pmde = pmdp_get(pmd); 735 736 if (!pmd_present(pmde) || !pmd_trans_huge(pmde)) 737 goto unlock; 738 739 /* Tell page walk code to not split the PMD */ 740 walk->action = ACTION_CONTINUE; 741 742 folio = damon_get_folio(pmd_pfn(pmde)); 743 if (!folio) 744 goto unlock; 745 746 if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) 747 goto put_folio; 748 749 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 750 migration_lists); 751 752 put_folio: 753 folio_put(folio); 754 unlock: 755 spin_unlock(ptl); 756 return 0; 757 } 758 #else 759 #define damos_va_migrate_pmd_entry NULL 760 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 761 762 static int damos_va_migrate_pte_entry(pte_t *pte, unsigned long addr, 763 unsigned long next, struct mm_walk *walk) 764 { 765 struct damos_va_migrate_private *priv = walk->private; 766 struct list_head *migration_lists = priv->migration_lists; 767 struct damos *s = priv->scheme; 768 struct damos_migrate_dests *dests = &s->migrate_dests; 769 struct folio *folio; 770 pte_t ptent; 771 772 ptent = ptep_get(pte); 773 if (pte_none(ptent) || !pte_present(ptent)) 774 return 0; 775 776 folio = damon_get_folio(pte_pfn(ptent)); 777 if (!folio) 778 return 0; 779 780 if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) 781 goto put_folio; 782 783 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 784 migration_lists); 785 786 put_folio: 787 folio_put(folio); 788 return 0; 789 } 790 791 /* 792 * Functions for the target validity check and cleanup 793 */ 794 795 static bool damon_va_target_valid(struct damon_target *t) 796 { 797 struct task_struct *task; 798 799 task = damon_get_task_struct(t); 800 if (task) { 801 put_task_struct(task); 802 return true; 803 } 804 805 return false; 806 } 807 808 static void damon_va_cleanup_target(struct damon_target *t) 809 { 810 put_pid(t->pid); 811 } 812 813 #ifndef CONFIG_ADVISE_SYSCALLS 814 static unsigned long damos_madvise(struct damon_target *target, 815 struct damon_region *r, int behavior) 816 { 817 return 0; 818 } 819 #else 820 static unsigned long damos_madvise(struct damon_target *target, 821 struct damon_region *r, int behavior) 822 { 823 struct mm_struct *mm; 824 unsigned long start = PAGE_ALIGN(r->ar.start); 825 unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 826 unsigned long applied; 827 828 mm = damon_get_mm(target); 829 if (!mm) 830 return 0; 831 832 applied = do_madvise(mm, start, len, behavior) ? 0 : len; 833 mmput(mm); 834 835 return applied; 836 } 837 #endif /* CONFIG_ADVISE_SYSCALLS */ 838 839 static unsigned long damos_va_migrate(struct damon_target *target, 840 struct damon_region *r, struct damos *s, 841 unsigned long *sz_filter_passed) 842 { 843 LIST_HEAD(folio_list); 844 struct damos_va_migrate_private priv; 845 struct mm_struct *mm; 846 int nr_dests; 847 int nid; 848 bool use_target_nid; 849 unsigned long applied = 0; 850 struct damos_migrate_dests *dests = &s->migrate_dests; 851 struct mm_walk_ops walk_ops = { 852 .pmd_entry = damos_va_migrate_pmd_entry, 853 .pte_entry = damos_va_migrate_pte_entry, 854 .walk_lock = PGWALK_RDLOCK, 855 }; 856 857 use_target_nid = dests->nr_dests == 0; 858 nr_dests = use_target_nid ? 1 : dests->nr_dests; 859 priv.scheme = s; 860 priv.migration_lists = kmalloc_array(nr_dests, 861 sizeof(*priv.migration_lists), GFP_KERNEL); 862 if (!priv.migration_lists) 863 return 0; 864 865 for (int i = 0; i < nr_dests; i++) 866 INIT_LIST_HEAD(&priv.migration_lists[i]); 867 868 869 mm = damon_get_mm(target); 870 if (!mm) 871 goto free_lists; 872 873 mmap_read_lock(mm); 874 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 875 mmap_read_unlock(mm); 876 mmput(mm); 877 878 for (int i = 0; i < nr_dests; i++) { 879 nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; 880 applied += damon_migrate_pages(&priv.migration_lists[i], nid); 881 cond_resched(); 882 } 883 884 free_lists: 885 kfree(priv.migration_lists); 886 return applied * PAGE_SIZE; 887 } 888 889 struct damos_va_stat_private { 890 struct damos *scheme; 891 unsigned long *sz_filter_passed; 892 }; 893 894 static inline bool damos_va_invalid_folio(struct folio *folio, 895 struct damos *s) 896 { 897 return !folio || folio == s->last_applied; 898 } 899 900 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, 901 unsigned long next, struct mm_walk *walk) 902 { 903 struct damos_va_stat_private *priv = walk->private; 904 struct damos *s = priv->scheme; 905 unsigned long *sz_filter_passed = priv->sz_filter_passed; 906 struct vm_area_struct *vma = walk->vma; 907 struct folio *folio; 908 spinlock_t *ptl; 909 pte_t *start_pte, *pte, ptent; 910 int nr; 911 912 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 913 if (pmd_trans_huge(*pmd)) { 914 pmd_t pmde; 915 916 ptl = pmd_trans_huge_lock(pmd, vma); 917 if (!ptl) 918 return 0; 919 pmde = pmdp_get(pmd); 920 if (!pmd_present(pmde)) 921 goto huge_unlock; 922 923 folio = vm_normal_folio_pmd(vma, addr, pmde); 924 925 if (damos_va_invalid_folio(folio, s)) 926 goto huge_unlock; 927 928 if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) 929 *sz_filter_passed += folio_size(folio); 930 s->last_applied = folio; 931 932 huge_unlock: 933 spin_unlock(ptl); 934 return 0; 935 } 936 #endif 937 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 938 if (!start_pte) 939 return 0; 940 941 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 942 nr = 1; 943 ptent = ptep_get(pte); 944 945 if (pte_none(ptent) || !pte_present(ptent)) 946 continue; 947 948 folio = vm_normal_folio(vma, addr, ptent); 949 950 if (damos_va_invalid_folio(folio, s)) 951 continue; 952 953 if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) 954 *sz_filter_passed += folio_size(folio); 955 nr = folio_nr_pages(folio); 956 s->last_applied = folio; 957 } 958 pte_unmap_unlock(start_pte, ptl); 959 return 0; 960 } 961 962 static unsigned long damos_va_stat(struct damon_target *target, 963 struct damon_region *r, struct damos *s, 964 unsigned long *sz_filter_passed) 965 { 966 struct damos_va_stat_private priv; 967 struct mm_struct *mm; 968 struct mm_walk_ops walk_ops = { 969 .pmd_entry = damos_va_stat_pmd_entry, 970 .walk_lock = PGWALK_RDLOCK, 971 }; 972 973 priv.scheme = s; 974 priv.sz_filter_passed = sz_filter_passed; 975 976 if (!damos_ops_has_filter(s)) 977 return 0; 978 979 mm = damon_get_mm(target); 980 if (!mm) 981 return 0; 982 983 mmap_read_lock(mm); 984 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 985 mmap_read_unlock(mm); 986 mmput(mm); 987 return 0; 988 } 989 990 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 991 struct damon_target *t, struct damon_region *r, 992 struct damos *scheme, unsigned long *sz_filter_passed) 993 { 994 int madv_action; 995 996 switch (scheme->action) { 997 case DAMOS_WILLNEED: 998 madv_action = MADV_WILLNEED; 999 break; 1000 case DAMOS_COLD: 1001 madv_action = MADV_COLD; 1002 break; 1003 case DAMOS_PAGEOUT: 1004 madv_action = MADV_PAGEOUT; 1005 break; 1006 case DAMOS_HUGEPAGE: 1007 madv_action = MADV_HUGEPAGE; 1008 break; 1009 case DAMOS_NOHUGEPAGE: 1010 madv_action = MADV_NOHUGEPAGE; 1011 break; 1012 case DAMOS_MIGRATE_HOT: 1013 case DAMOS_MIGRATE_COLD: 1014 return damos_va_migrate(t, r, scheme, sz_filter_passed); 1015 case DAMOS_STAT: 1016 return damos_va_stat(t, r, scheme, sz_filter_passed); 1017 default: 1018 /* 1019 * DAMOS actions that are not yet supported by 'vaddr'. 1020 */ 1021 return 0; 1022 } 1023 1024 return damos_madvise(t, r, madv_action); 1025 } 1026 1027 static int damon_va_scheme_score(struct damon_ctx *context, 1028 struct damon_target *t, struct damon_region *r, 1029 struct damos *scheme) 1030 { 1031 1032 switch (scheme->action) { 1033 case DAMOS_PAGEOUT: 1034 return damon_cold_score(context, r, scheme); 1035 case DAMOS_MIGRATE_HOT: 1036 return damon_hot_score(context, r, scheme); 1037 case DAMOS_MIGRATE_COLD: 1038 return damon_cold_score(context, r, scheme); 1039 default: 1040 break; 1041 } 1042 1043 return DAMOS_MAX_SCORE; 1044 } 1045 1046 static int __init damon_va_initcall(void) 1047 { 1048 struct damon_operations ops = { 1049 .id = DAMON_OPS_VADDR, 1050 .init = damon_va_init, 1051 .update = damon_va_update, 1052 .prepare_access_checks = damon_va_prepare_access_checks, 1053 .check_accesses = damon_va_check_accesses, 1054 .target_valid = damon_va_target_valid, 1055 .cleanup_target = damon_va_cleanup_target, 1056 .cleanup = NULL, 1057 .apply_scheme = damon_va_apply_scheme, 1058 .get_scheme_score = damon_va_scheme_score, 1059 }; 1060 /* ops for fixed virtual address ranges */ 1061 struct damon_operations ops_fvaddr = ops; 1062 int err; 1063 1064 /* Don't set the monitoring target regions for the entire mapping */ 1065 ops_fvaddr.id = DAMON_OPS_FVADDR; 1066 ops_fvaddr.init = NULL; 1067 ops_fvaddr.update = NULL; 1068 1069 err = damon_register_ops(&ops); 1070 if (err) 1071 return err; 1072 return damon_register_ops(&ops_fvaddr); 1073 }; 1074 1075 subsys_initcall(damon_va_initcall); 1076 1077 #include "tests/vaddr-kunit.h" 1078