1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Code for Virtual Address Spaces 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-va: " fmt 9 10 #include <linux/highmem.h> 11 #include <linux/hugetlb.h> 12 #include <linux/mman.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/page_idle.h> 15 #include <linux/pagewalk.h> 16 #include <linux/sched/mm.h> 17 18 #include "../internal.h" 19 #include "ops-common.h" 20 21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 22 #undef DAMON_MIN_REGION_SZ 23 #define DAMON_MIN_REGION_SZ 1 24 #endif 25 26 /* 27 * 't->pid' should be the pointer to the relevant 'struct pid' having reference 28 * count. Caller must put the returned task, unless it is NULL. 29 */ 30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 31 { 32 return get_pid_task(t->pid, PIDTYPE_PID); 33 } 34 35 /* 36 * Get the mm_struct of the given target 37 * 38 * Caller _must_ put the mm_struct after use, unless it is NULL. 39 * 40 * Returns the mm_struct of the target on success, NULL on failure 41 */ 42 static struct mm_struct *damon_get_mm(struct damon_target *t) 43 { 44 struct task_struct *task; 45 struct mm_struct *mm; 46 47 task = damon_get_task_struct(t); 48 if (!task) 49 return NULL; 50 51 mm = get_task_mm(task); 52 put_task_struct(task); 53 return mm; 54 } 55 56 static unsigned long sz_range(struct damon_addr_range *r) 57 { 58 return r->end - r->start; 59 } 60 61 /* 62 * Find three regions separated by two biggest unmapped regions 63 * 64 * vma the head vma of the target address space 65 * regions an array of three address ranges that results will be saved 66 * 67 * This function receives an address space and finds three regions in it which 68 * separated by the two biggest unmapped regions in the space. Please refer to 69 * below comments of '__damon_va_init_regions()' function to know why this is 70 * necessary. 71 * 72 * Returns 0 if success, or negative error code otherwise. 73 */ 74 static int __damon_va_three_regions(struct mm_struct *mm, 75 struct damon_addr_range regions[3]) 76 { 77 struct damon_addr_range first_gap = {0}, second_gap = {0}; 78 VMA_ITERATOR(vmi, mm, 0); 79 struct vm_area_struct *vma, *prev = NULL; 80 unsigned long start; 81 82 /* 83 * Find the two biggest gaps so that first_gap > second_gap > others. 84 * If this is too slow, it can be optimised to examine the maple 85 * tree gaps. 86 */ 87 rcu_read_lock(); 88 for_each_vma(vmi, vma) { 89 unsigned long gap; 90 91 if (!prev) { 92 start = vma->vm_start; 93 goto next; 94 } 95 gap = vma->vm_start - prev->vm_end; 96 97 if (gap > sz_range(&first_gap)) { 98 second_gap = first_gap; 99 first_gap.start = prev->vm_end; 100 first_gap.end = vma->vm_start; 101 } else if (gap > sz_range(&second_gap)) { 102 second_gap.start = prev->vm_end; 103 second_gap.end = vma->vm_start; 104 } 105 next: 106 prev = vma; 107 } 108 rcu_read_unlock(); 109 110 if (!sz_range(&second_gap) || !sz_range(&first_gap)) 111 return -EINVAL; 112 113 /* Sort the two biggest gaps by address */ 114 if (first_gap.start > second_gap.start) 115 swap(first_gap, second_gap); 116 117 /* Store the result */ 118 regions[0].start = ALIGN(start, DAMON_MIN_REGION_SZ); 119 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION_SZ); 120 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION_SZ); 121 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION_SZ); 122 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION_SZ); 123 regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION_SZ); 124 125 return 0; 126 } 127 128 /* 129 * Get the three regions in the given target (task) 130 * 131 * Returns 0 on success, negative error code otherwise. 132 */ 133 static int damon_va_three_regions(struct damon_target *t, 134 struct damon_addr_range regions[3]) 135 { 136 struct mm_struct *mm; 137 int rc; 138 139 mm = damon_get_mm(t); 140 if (!mm) 141 return -EINVAL; 142 143 mmap_read_lock(mm); 144 rc = __damon_va_three_regions(mm, regions); 145 mmap_read_unlock(mm); 146 147 mmput(mm); 148 return rc; 149 } 150 151 /* 152 * Initialize the monitoring target regions for the given target (task) 153 * 154 * t the given target 155 * 156 * Because only a number of small portions of the entire address space 157 * is actually mapped to the memory and accessed, monitoring the unmapped 158 * regions is wasteful. That said, because we can deal with small noises, 159 * tracking every mapping is not strictly required but could even incur a high 160 * overhead if the mapping frequently changes or the number of mappings is 161 * high. The adaptive regions adjustment mechanism will further help to deal 162 * with the noise by simply identifying the unmapped areas as a region that 163 * has no access. Moreover, applying the real mappings that would have many 164 * unmapped areas inside will make the adaptive mechanism quite complex. That 165 * said, too huge unmapped areas inside the monitoring target should be removed 166 * to not take the time for the adaptive mechanism. 167 * 168 * For the reason, we convert the complex mappings to three distinct regions 169 * that cover every mapped area of the address space. Also the two gaps 170 * between the three regions are the two biggest unmapped areas in the given 171 * address space. In detail, this function first identifies the start and the 172 * end of the mappings and the two biggest unmapped areas of the address space. 173 * Then, it constructs the three regions as below: 174 * 175 * [mappings[0]->start, big_two_unmapped_areas[0]->start) 176 * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 177 * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 178 * 179 * As usual memory map of processes is as below, the gap between the heap and 180 * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 181 * region and the stack will be two biggest unmapped regions. Because these 182 * gaps are exceptionally huge areas in usual address space, excluding these 183 * two biggest unmapped regions will be sufficient to make a trade-off. 184 * 185 * <heap> 186 * <BIG UNMAPPED REGION 1> 187 * <uppermost mmap()-ed region> 188 * (other mmap()-ed regions and small unmapped regions) 189 * <lowermost mmap()-ed region> 190 * <BIG UNMAPPED REGION 2> 191 * <stack> 192 */ 193 static void __damon_va_init_regions(struct damon_ctx *ctx, 194 struct damon_target *t) 195 { 196 struct damon_target *ti; 197 struct damon_addr_range regions[3]; 198 int tidx = 0; 199 200 if (damon_va_three_regions(t, regions)) { 201 damon_for_each_target(ti, ctx) { 202 if (ti == t) 203 break; 204 tidx++; 205 } 206 pr_debug("Failed to get three regions of %dth target\n", tidx); 207 return; 208 } 209 210 damon_set_regions(t, regions, 3, DAMON_MIN_REGION_SZ); 211 } 212 213 /* Initialize '->regions_list' of every target (task) */ 214 static void damon_va_init(struct damon_ctx *ctx) 215 { 216 struct damon_target *t; 217 218 damon_for_each_target(t, ctx) { 219 /* the user may set the target regions as they want */ 220 if (!damon_nr_regions(t)) 221 __damon_va_init_regions(ctx, t); 222 } 223 } 224 225 /* 226 * Update regions for current memory mappings 227 */ 228 static void damon_va_update(struct damon_ctx *ctx) 229 { 230 struct damon_addr_range three_regions[3]; 231 struct damon_target *t; 232 233 damon_for_each_target(t, ctx) { 234 if (damon_va_three_regions(t, three_regions)) 235 continue; 236 damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION_SZ); 237 } 238 } 239 240 static void damon_va_walk_page_range(struct mm_struct *mm, unsigned long start, 241 unsigned long end, struct mm_walk_ops *ops, void *private) 242 { 243 struct vm_area_struct *vma; 244 245 vma = lock_vma_under_rcu(mm, start); 246 if (!vma) 247 goto lock_mmap; 248 249 if (end > vma->vm_end) { 250 vma_end_read(vma); 251 goto lock_mmap; 252 } 253 254 if (!(vma->vm_flags & VM_PFNMAP)) { 255 ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY; 256 walk_page_range_vma(vma, start, end, ops, private); 257 } 258 259 vma_end_read(vma); 260 return; 261 262 lock_mmap: 263 mmap_read_lock(mm); 264 ops->walk_lock = PGWALK_RDLOCK; 265 walk_page_range(mm, start, end, ops, private); 266 mmap_read_unlock(mm); 267 } 268 269 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 270 unsigned long next, struct mm_walk *walk) 271 { 272 pte_t *pte; 273 spinlock_t *ptl; 274 275 ptl = pmd_trans_huge_lock(pmd, walk->vma); 276 if (ptl) { 277 pmd_t pmde = pmdp_get(pmd); 278 279 if (pmd_present(pmde)) 280 damon_pmdp_mkold(pmd, walk->vma, addr); 281 spin_unlock(ptl); 282 return 0; 283 } 284 285 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 286 if (!pte) 287 return 0; 288 if (!pte_present(ptep_get(pte))) 289 goto out; 290 damon_ptep_mkold(pte, walk->vma, addr); 291 out: 292 pte_unmap_unlock(pte, ptl); 293 return 0; 294 } 295 296 #ifdef CONFIG_HUGETLB_PAGE 297 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 298 struct vm_area_struct *vma, unsigned long addr) 299 { 300 bool referenced = false; 301 pte_t entry = huge_ptep_get(mm, addr, pte); 302 struct folio *folio = pfn_folio(pte_pfn(entry)); 303 unsigned long psize = huge_page_size(hstate_vma(vma)); 304 305 folio_get(folio); 306 307 if (pte_young(entry)) { 308 referenced = true; 309 entry = pte_mkold(entry); 310 set_huge_pte_at(mm, addr, pte, entry, psize); 311 } 312 313 if (mmu_notifier_clear_young(mm, addr, 314 addr + huge_page_size(hstate_vma(vma)))) 315 referenced = true; 316 317 if (referenced) 318 folio_set_young(folio); 319 320 folio_set_idle(folio); 321 folio_put(folio); 322 } 323 324 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 325 unsigned long addr, unsigned long end, 326 struct mm_walk *walk) 327 { 328 struct hstate *h = hstate_vma(walk->vma); 329 spinlock_t *ptl; 330 pte_t entry; 331 332 ptl = huge_pte_lock(h, walk->mm, pte); 333 entry = huge_ptep_get(walk->mm, addr, pte); 334 if (!pte_present(entry)) 335 goto out; 336 337 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 338 339 out: 340 spin_unlock(ptl); 341 return 0; 342 } 343 #else 344 #define damon_mkold_hugetlb_entry NULL 345 #endif /* CONFIG_HUGETLB_PAGE */ 346 347 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 348 { 349 struct mm_walk_ops damon_mkold_ops = { 350 .pmd_entry = damon_mkold_pmd_entry, 351 .hugetlb_entry = damon_mkold_hugetlb_entry, 352 }; 353 354 damon_va_walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 355 } 356 357 /* 358 * Functions for the access checking of the regions 359 */ 360 361 static void __damon_va_prepare_access_check(struct mm_struct *mm, 362 struct damon_region *r, 363 struct damon_ctx *ctx) 364 { 365 r->sampling_addr = damon_rand(ctx, r->ar.start, r->ar.end); 366 367 damon_va_mkold(mm, r->sampling_addr); 368 } 369 370 static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 371 { 372 struct damon_target *t; 373 struct mm_struct *mm; 374 struct damon_region *r; 375 376 damon_for_each_target(t, ctx) { 377 mm = damon_get_mm(t); 378 if (!mm) 379 continue; 380 damon_for_each_region(r, t) 381 __damon_va_prepare_access_check(mm, r, ctx); 382 mmput(mm); 383 } 384 } 385 386 struct damon_young_walk_private { 387 /* size of the folio for the access checked virtual memory address */ 388 unsigned long *folio_sz; 389 bool young; 390 }; 391 392 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 393 unsigned long next, struct mm_walk *walk) 394 { 395 pte_t *pte; 396 pte_t ptent; 397 spinlock_t *ptl; 398 struct folio *folio; 399 struct damon_young_walk_private *priv = walk->private; 400 401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 402 ptl = pmd_trans_huge_lock(pmd, walk->vma); 403 if (ptl) { 404 pmd_t pmde = pmdp_get(pmd); 405 406 if (!pmd_present(pmde)) 407 goto huge_out; 408 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 409 if (!folio) 410 goto huge_out; 411 if (pmd_young(pmde) || !folio_test_idle(folio) || 412 mmu_notifier_test_young(walk->mm, 413 addr)) 414 priv->young = true; 415 *priv->folio_sz = HPAGE_PMD_SIZE; 416 huge_out: 417 spin_unlock(ptl); 418 return 0; 419 } 420 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 421 422 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 423 if (!pte) 424 return 0; 425 ptent = ptep_get(pte); 426 if (!pte_present(ptent)) 427 goto out; 428 folio = vm_normal_folio(walk->vma, addr, ptent); 429 if (!folio) 430 goto out; 431 if (pte_young(ptent) || !folio_test_idle(folio) || 432 mmu_notifier_test_young(walk->mm, addr)) 433 priv->young = true; 434 *priv->folio_sz = folio_size(folio); 435 out: 436 pte_unmap_unlock(pte, ptl); 437 return 0; 438 } 439 440 #ifdef CONFIG_HUGETLB_PAGE 441 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 442 unsigned long addr, unsigned long end, 443 struct mm_walk *walk) 444 { 445 struct damon_young_walk_private *priv = walk->private; 446 struct hstate *h = hstate_vma(walk->vma); 447 struct folio *folio; 448 spinlock_t *ptl; 449 pte_t entry; 450 451 ptl = huge_pte_lock(h, walk->mm, pte); 452 entry = huge_ptep_get(walk->mm, addr, pte); 453 if (!pte_present(entry)) 454 goto out; 455 456 folio = pfn_folio(pte_pfn(entry)); 457 folio_get(folio); 458 459 if (pte_young(entry) || !folio_test_idle(folio) || 460 mmu_notifier_test_young(walk->mm, addr)) 461 priv->young = true; 462 *priv->folio_sz = huge_page_size(h); 463 464 folio_put(folio); 465 466 out: 467 spin_unlock(ptl); 468 return 0; 469 } 470 #else 471 #define damon_young_hugetlb_entry NULL 472 #endif /* CONFIG_HUGETLB_PAGE */ 473 474 static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 475 unsigned long *folio_sz) 476 { 477 struct damon_young_walk_private arg = { 478 .folio_sz = folio_sz, 479 .young = false, 480 }; 481 482 struct mm_walk_ops damon_young_ops = { 483 .pmd_entry = damon_young_pmd_entry, 484 .hugetlb_entry = damon_young_hugetlb_entry, 485 }; 486 487 damon_va_walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 488 return arg.young; 489 } 490 491 /* 492 * Check whether the region was accessed after the last preparation 493 * 494 * mm 'mm_struct' for the given virtual address space 495 * r the region to be checked 496 */ 497 static void __damon_va_check_access(struct mm_struct *mm, 498 struct damon_region *r, bool same_target, 499 struct damon_attrs *attrs) 500 { 501 static unsigned long last_addr; 502 static unsigned long last_folio_sz = PAGE_SIZE; 503 static bool last_accessed; 504 505 if (!mm) { 506 damon_update_region_access_rate(r, false, attrs); 507 return; 508 } 509 510 /* If the region is in the last checked page, reuse the result */ 511 if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 512 ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 513 damon_update_region_access_rate(r, last_accessed, attrs); 514 return; 515 } 516 517 last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 518 damon_update_region_access_rate(r, last_accessed, attrs); 519 520 last_addr = r->sampling_addr; 521 } 522 523 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 524 { 525 struct damon_target *t; 526 struct mm_struct *mm; 527 struct damon_region *r; 528 unsigned int max_nr_accesses = 0; 529 bool same_target; 530 531 damon_for_each_target(t, ctx) { 532 mm = damon_get_mm(t); 533 same_target = false; 534 damon_for_each_region(r, t) { 535 __damon_va_check_access(mm, r, same_target, 536 &ctx->attrs); 537 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 538 same_target = true; 539 } 540 if (mm) 541 mmput(mm); 542 } 543 544 return max_nr_accesses; 545 } 546 547 static bool damos_va_filter_young_match(struct damos_filter *filter, 548 struct folio *folio, struct vm_area_struct *vma, 549 unsigned long addr, pte_t *ptep, pmd_t *pmdp) 550 { 551 bool young = false; 552 553 if (ptep) 554 young = pte_young(ptep_get(ptep)); 555 else if (pmdp) 556 young = pmd_young(pmdp_get(pmdp)); 557 558 young = young || !folio_test_idle(folio) || 559 mmu_notifier_test_young(vma->vm_mm, addr); 560 561 if (young && ptep) 562 damon_ptep_mkold(ptep, vma, addr); 563 else if (young && pmdp) 564 damon_pmdp_mkold(pmdp, vma, addr); 565 566 return young == filter->matching; 567 } 568 569 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, 570 struct vm_area_struct *vma, unsigned long addr, 571 pte_t *ptep, pmd_t *pmdp) 572 { 573 struct damos_filter *filter; 574 bool matched; 575 576 if (scheme->core_filters_allowed) 577 return false; 578 579 damos_for_each_ops_filter(filter, scheme) { 580 /* 581 * damos_folio_filter_match checks the young filter by doing an 582 * rmap on the folio to find its page table. However, being the 583 * vaddr scheme, we have direct access to the page tables, so 584 * use that instead. 585 */ 586 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) 587 matched = damos_va_filter_young_match(filter, folio, 588 vma, addr, ptep, pmdp); 589 else 590 matched = damos_folio_filter_match(filter, folio); 591 592 if (matched) 593 return !filter->allow; 594 } 595 return scheme->ops_filters_default_reject; 596 } 597 598 struct damos_va_migrate_private { 599 struct list_head *migration_lists; 600 struct damos *scheme; 601 }; 602 603 /* 604 * Place the given folio in the migration_list corresponding to where the folio 605 * should be migrated. 606 * 607 * The algorithm used here is similar to weighted_interleave_nid() 608 */ 609 static void damos_va_migrate_dests_add(struct folio *folio, 610 struct vm_area_struct *vma, unsigned long addr, 611 struct damos_migrate_dests *dests, 612 struct list_head *migration_lists) 613 { 614 pgoff_t ilx; 615 int order; 616 unsigned int target; 617 unsigned int weight_total = 0; 618 int i; 619 620 /* 621 * If dests is empty, there is only one migration list corresponding 622 * to s->target_nid. 623 */ 624 if (!dests->nr_dests) { 625 i = 0; 626 goto isolate; 627 } 628 629 order = folio_order(folio); 630 ilx = vma->vm_pgoff >> order; 631 ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); 632 633 for (i = 0; i < dests->nr_dests; i++) 634 weight_total += dests->weight_arr[i]; 635 636 /* If the total weights are somehow 0, don't migrate at all */ 637 if (!weight_total) 638 return; 639 640 target = ilx % weight_total; 641 for (i = 0; i < dests->nr_dests; i++) { 642 if (target < dests->weight_arr[i]) 643 break; 644 target -= dests->weight_arr[i]; 645 } 646 647 /* If the folio is already in the right node, don't do anything */ 648 if (folio_nid(folio) == dests->node_id_arr[i]) 649 return; 650 651 isolate: 652 if (!folio_isolate_lru(folio)) 653 return; 654 655 list_add(&folio->lru, &migration_lists[i]); 656 } 657 658 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, 659 unsigned long next, struct mm_walk *walk) 660 { 661 struct damos_va_migrate_private *priv = walk->private; 662 struct list_head *migration_lists = priv->migration_lists; 663 struct damos *s = priv->scheme; 664 struct damos_migrate_dests *dests = &s->migrate_dests; 665 struct folio *folio; 666 spinlock_t *ptl; 667 pte_t *start_pte, *pte, ptent; 668 int nr; 669 670 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 671 ptl = pmd_trans_huge_lock(pmd, walk->vma); 672 if (ptl) { 673 pmd_t pmde = pmdp_get(pmd); 674 675 if (!pmd_present(pmde)) 676 goto huge_out; 677 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 678 if (!folio) 679 goto huge_out; 680 if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) 681 goto huge_out; 682 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 683 migration_lists); 684 huge_out: 685 spin_unlock(ptl); 686 return 0; 687 } 688 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 689 690 start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 691 if (!pte) 692 return 0; 693 694 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 695 nr = 1; 696 ptent = ptep_get(pte); 697 698 if (pte_none(ptent) || !pte_present(ptent)) 699 continue; 700 folio = vm_normal_folio(walk->vma, addr, ptent); 701 if (!folio) 702 continue; 703 if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) 704 continue; 705 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 706 migration_lists); 707 nr = folio_nr_pages(folio); 708 } 709 pte_unmap_unlock(start_pte, ptl); 710 return 0; 711 } 712 713 /* 714 * Functions for the target validity check and cleanup 715 */ 716 717 static bool damon_va_target_valid(struct damon_target *t) 718 { 719 struct task_struct *task; 720 721 task = damon_get_task_struct(t); 722 if (task) { 723 put_task_struct(task); 724 return true; 725 } 726 727 return false; 728 } 729 730 static void damon_va_cleanup_target(struct damon_target *t) 731 { 732 put_pid(t->pid); 733 } 734 735 #ifndef CONFIG_ADVISE_SYSCALLS 736 static unsigned long damos_madvise(struct damon_target *target, 737 struct damon_region *r, int behavior) 738 { 739 return 0; 740 } 741 #else 742 static unsigned long damos_madvise(struct damon_target *target, 743 struct damon_region *r, int behavior) 744 { 745 struct mm_struct *mm; 746 unsigned long start = PAGE_ALIGN(r->ar.start); 747 unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 748 unsigned long applied; 749 750 mm = damon_get_mm(target); 751 if (!mm) 752 return 0; 753 754 applied = do_madvise(mm, start, len, behavior) ? 0 : len; 755 mmput(mm); 756 757 return applied; 758 } 759 #endif /* CONFIG_ADVISE_SYSCALLS */ 760 761 static unsigned long damos_va_migrate(struct damon_target *target, 762 struct damon_region *r, struct damos *s, 763 unsigned long *sz_filter_passed) 764 { 765 LIST_HEAD(folio_list); 766 struct damos_va_migrate_private priv; 767 struct mm_struct *mm; 768 int nr_dests; 769 int nid; 770 bool use_target_nid; 771 unsigned long applied = 0; 772 struct damos_migrate_dests *dests = &s->migrate_dests; 773 struct mm_walk_ops walk_ops = { 774 .pmd_entry = damos_va_migrate_pmd_entry, 775 .pte_entry = NULL, 776 }; 777 778 use_target_nid = dests->nr_dests == 0; 779 nr_dests = use_target_nid ? 1 : dests->nr_dests; 780 priv.scheme = s; 781 priv.migration_lists = kmalloc_objs(*priv.migration_lists, nr_dests); 782 if (!priv.migration_lists) 783 return 0; 784 785 for (int i = 0; i < nr_dests; i++) 786 INIT_LIST_HEAD(&priv.migration_lists[i]); 787 788 789 mm = damon_get_mm(target); 790 if (!mm) 791 goto free_lists; 792 793 damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 794 mmput(mm); 795 796 for (int i = 0; i < nr_dests; i++) { 797 nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; 798 applied += damon_migrate_pages(&priv.migration_lists[i], nid); 799 cond_resched(); 800 } 801 802 free_lists: 803 kfree(priv.migration_lists); 804 return applied * PAGE_SIZE; 805 } 806 807 struct damos_va_stat_private { 808 struct damos *scheme; 809 unsigned long *sz_filter_passed; 810 }; 811 812 static inline bool damos_va_invalid_folio(struct folio *folio, 813 struct damos *s) 814 { 815 return !folio || folio == s->last_applied; 816 } 817 818 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, 819 unsigned long next, struct mm_walk *walk) 820 { 821 struct damos_va_stat_private *priv = walk->private; 822 struct damos *s = priv->scheme; 823 unsigned long *sz_filter_passed = priv->sz_filter_passed; 824 struct vm_area_struct *vma = walk->vma; 825 struct folio *folio; 826 spinlock_t *ptl; 827 pte_t *start_pte, *pte, ptent; 828 int nr; 829 830 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 831 ptl = pmd_trans_huge_lock(pmd, vma); 832 if (ptl) { 833 pmd_t pmde = pmdp_get(pmd); 834 835 if (!pmd_present(pmde)) 836 goto huge_unlock; 837 838 folio = vm_normal_folio_pmd(vma, addr, pmde); 839 840 if (damos_va_invalid_folio(folio, s)) 841 goto huge_unlock; 842 843 if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) 844 *sz_filter_passed += folio_size(folio); 845 s->last_applied = folio; 846 847 huge_unlock: 848 spin_unlock(ptl); 849 return 0; 850 } 851 #endif 852 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 853 if (!start_pte) 854 return 0; 855 856 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 857 nr = 1; 858 ptent = ptep_get(pte); 859 860 if (pte_none(ptent) || !pte_present(ptent)) 861 continue; 862 863 folio = vm_normal_folio(vma, addr, ptent); 864 865 if (damos_va_invalid_folio(folio, s)) 866 continue; 867 868 if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) 869 *sz_filter_passed += folio_size(folio); 870 nr = folio_nr_pages(folio); 871 s->last_applied = folio; 872 } 873 pte_unmap_unlock(start_pte, ptl); 874 return 0; 875 } 876 877 static unsigned long damos_va_stat(struct damon_target *target, 878 struct damon_region *r, struct damos *s, 879 unsigned long *sz_filter_passed) 880 { 881 struct damos_va_stat_private priv; 882 struct mm_struct *mm; 883 struct mm_walk_ops walk_ops = { 884 .pmd_entry = damos_va_stat_pmd_entry, 885 }; 886 887 priv.scheme = s; 888 priv.sz_filter_passed = sz_filter_passed; 889 890 if (!damos_ops_has_filter(s)) 891 return 0; 892 893 mm = damon_get_mm(target); 894 if (!mm) 895 return 0; 896 897 damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 898 mmput(mm); 899 return 0; 900 } 901 902 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 903 struct damon_target *t, struct damon_region *r, 904 struct damos *scheme, unsigned long *sz_filter_passed) 905 { 906 int madv_action; 907 908 switch (scheme->action) { 909 case DAMOS_WILLNEED: 910 madv_action = MADV_WILLNEED; 911 break; 912 case DAMOS_COLD: 913 madv_action = MADV_COLD; 914 break; 915 case DAMOS_PAGEOUT: 916 madv_action = MADV_PAGEOUT; 917 break; 918 case DAMOS_HUGEPAGE: 919 madv_action = MADV_HUGEPAGE; 920 break; 921 case DAMOS_NOHUGEPAGE: 922 madv_action = MADV_NOHUGEPAGE; 923 break; 924 case DAMOS_COLLAPSE: 925 madv_action = MADV_COLLAPSE; 926 break; 927 case DAMOS_MIGRATE_HOT: 928 case DAMOS_MIGRATE_COLD: 929 return damos_va_migrate(t, r, scheme, sz_filter_passed); 930 case DAMOS_STAT: 931 return damos_va_stat(t, r, scheme, sz_filter_passed); 932 default: 933 /* 934 * DAMOS actions that are not yet supported by 'vaddr'. 935 */ 936 return 0; 937 } 938 939 return damos_madvise(t, r, madv_action); 940 } 941 942 static int damon_va_scheme_score(struct damon_ctx *context, 943 struct damon_region *r, struct damos *scheme) 944 { 945 946 switch (scheme->action) { 947 case DAMOS_PAGEOUT: 948 return damon_cold_score(context, r, scheme); 949 case DAMOS_MIGRATE_HOT: 950 return damon_hot_score(context, r, scheme); 951 case DAMOS_MIGRATE_COLD: 952 return damon_cold_score(context, r, scheme); 953 default: 954 break; 955 } 956 957 return DAMOS_MAX_SCORE; 958 } 959 960 static int __init damon_va_initcall(void) 961 { 962 struct damon_operations ops = { 963 .id = DAMON_OPS_VADDR, 964 .init = damon_va_init, 965 .update = damon_va_update, 966 .prepare_access_checks = damon_va_prepare_access_checks, 967 .check_accesses = damon_va_check_accesses, 968 .target_valid = damon_va_target_valid, 969 .cleanup_target = damon_va_cleanup_target, 970 .apply_scheme = damon_va_apply_scheme, 971 .get_scheme_score = damon_va_scheme_score, 972 }; 973 /* ops for fixed virtual address ranges */ 974 struct damon_operations ops_fvaddr = ops; 975 int err; 976 977 /* Don't set the monitoring target regions for the entire mapping */ 978 ops_fvaddr.id = DAMON_OPS_FVADDR; 979 ops_fvaddr.init = NULL; 980 ops_fvaddr.update = NULL; 981 982 err = damon_register_ops(&ops); 983 if (err) 984 return err; 985 return damon_register_ops(&ops_fvaddr); 986 }; 987 988 subsys_initcall(damon_va_initcall); 989 990 #include "tests/vaddr-kunit.h" 991