1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Code for Virtual Address Spaces 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-va: " fmt 9 10 #include <linux/highmem.h> 11 #include <linux/hugetlb.h> 12 #include <linux/mman.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/page_idle.h> 15 #include <linux/pagewalk.h> 16 #include <linux/sched/mm.h> 17 18 #include "../internal.h" 19 #include "ops-common.h" 20 21 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 22 #undef DAMON_MIN_REGION 23 #define DAMON_MIN_REGION 1 24 #endif 25 26 /* 27 * 't->pid' should be the pointer to the relevant 'struct pid' having reference 28 * count. Caller must put the returned task, unless it is NULL. 29 */ 30 static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 31 { 32 return get_pid_task(t->pid, PIDTYPE_PID); 33 } 34 35 /* 36 * Get the mm_struct of the given target 37 * 38 * Caller _must_ put the mm_struct after use, unless it is NULL. 39 * 40 * Returns the mm_struct of the target on success, NULL on failure 41 */ 42 static struct mm_struct *damon_get_mm(struct damon_target *t) 43 { 44 struct task_struct *task; 45 struct mm_struct *mm; 46 47 task = damon_get_task_struct(t); 48 if (!task) 49 return NULL; 50 51 mm = get_task_mm(task); 52 put_task_struct(task); 53 return mm; 54 } 55 56 /* 57 * Functions for the initial monitoring target regions construction 58 */ 59 60 /* 61 * Size-evenly split a region into 'nr_pieces' small regions 62 * 63 * Returns 0 on success, or negative error code otherwise. 64 */ 65 static int damon_va_evenly_split_region(struct damon_target *t, 66 struct damon_region *r, unsigned int nr_pieces) 67 { 68 unsigned long sz_orig, sz_piece, orig_end; 69 struct damon_region *n = NULL, *next; 70 unsigned long start; 71 unsigned int i; 72 73 if (!r || !nr_pieces) 74 return -EINVAL; 75 76 if (nr_pieces == 1) 77 return 0; 78 79 orig_end = r->ar.end; 80 sz_orig = damon_sz_region(r); 81 sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); 82 83 if (!sz_piece) 84 return -EINVAL; 85 86 r->ar.end = r->ar.start + sz_piece; 87 next = damon_next_region(r); 88 for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) { 89 n = damon_new_region(start, start + sz_piece); 90 if (!n) 91 return -ENOMEM; 92 damon_insert_region(n, r, next, t); 93 r = n; 94 } 95 /* complement last region for possible rounding error */ 96 if (n) 97 n->ar.end = orig_end; 98 99 return 0; 100 } 101 102 static unsigned long sz_range(struct damon_addr_range *r) 103 { 104 return r->end - r->start; 105 } 106 107 /* 108 * Find three regions separated by two biggest unmapped regions 109 * 110 * vma the head vma of the target address space 111 * regions an array of three address ranges that results will be saved 112 * 113 * This function receives an address space and finds three regions in it which 114 * separated by the two biggest unmapped regions in the space. Please refer to 115 * below comments of '__damon_va_init_regions()' function to know why this is 116 * necessary. 117 * 118 * Returns 0 if success, or negative error code otherwise. 119 */ 120 static int __damon_va_three_regions(struct mm_struct *mm, 121 struct damon_addr_range regions[3]) 122 { 123 struct damon_addr_range first_gap = {0}, second_gap = {0}; 124 VMA_ITERATOR(vmi, mm, 0); 125 struct vm_area_struct *vma, *prev = NULL; 126 unsigned long start; 127 128 /* 129 * Find the two biggest gaps so that first_gap > second_gap > others. 130 * If this is too slow, it can be optimised to examine the maple 131 * tree gaps. 132 */ 133 rcu_read_lock(); 134 for_each_vma(vmi, vma) { 135 unsigned long gap; 136 137 if (!prev) { 138 start = vma->vm_start; 139 goto next; 140 } 141 gap = vma->vm_start - prev->vm_end; 142 143 if (gap > sz_range(&first_gap)) { 144 second_gap = first_gap; 145 first_gap.start = prev->vm_end; 146 first_gap.end = vma->vm_start; 147 } else if (gap > sz_range(&second_gap)) { 148 second_gap.start = prev->vm_end; 149 second_gap.end = vma->vm_start; 150 } 151 next: 152 prev = vma; 153 } 154 rcu_read_unlock(); 155 156 if (!sz_range(&second_gap) || !sz_range(&first_gap)) 157 return -EINVAL; 158 159 /* Sort the two biggest gaps by address */ 160 if (first_gap.start > second_gap.start) 161 swap(first_gap, second_gap); 162 163 /* Store the result */ 164 regions[0].start = ALIGN(start, DAMON_MIN_REGION); 165 regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); 166 regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); 167 regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); 168 regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); 169 regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); 170 171 return 0; 172 } 173 174 /* 175 * Get the three regions in the given target (task) 176 * 177 * Returns 0 on success, negative error code otherwise. 178 */ 179 static int damon_va_three_regions(struct damon_target *t, 180 struct damon_addr_range regions[3]) 181 { 182 struct mm_struct *mm; 183 int rc; 184 185 mm = damon_get_mm(t); 186 if (!mm) 187 return -EINVAL; 188 189 mmap_read_lock(mm); 190 rc = __damon_va_three_regions(mm, regions); 191 mmap_read_unlock(mm); 192 193 mmput(mm); 194 return rc; 195 } 196 197 /* 198 * Initialize the monitoring target regions for the given target (task) 199 * 200 * t the given target 201 * 202 * Because only a number of small portions of the entire address space 203 * is actually mapped to the memory and accessed, monitoring the unmapped 204 * regions is wasteful. That said, because we can deal with small noises, 205 * tracking every mapping is not strictly required but could even incur a high 206 * overhead if the mapping frequently changes or the number of mappings is 207 * high. The adaptive regions adjustment mechanism will further help to deal 208 * with the noise by simply identifying the unmapped areas as a region that 209 * has no access. Moreover, applying the real mappings that would have many 210 * unmapped areas inside will make the adaptive mechanism quite complex. That 211 * said, too huge unmapped areas inside the monitoring target should be removed 212 * to not take the time for the adaptive mechanism. 213 * 214 * For the reason, we convert the complex mappings to three distinct regions 215 * that cover every mapped area of the address space. Also the two gaps 216 * between the three regions are the two biggest unmapped areas in the given 217 * address space. In detail, this function first identifies the start and the 218 * end of the mappings and the two biggest unmapped areas of the address space. 219 * Then, it constructs the three regions as below: 220 * 221 * [mappings[0]->start, big_two_unmapped_areas[0]->start) 222 * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 223 * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 224 * 225 * As usual memory map of processes is as below, the gap between the heap and 226 * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 227 * region and the stack will be two biggest unmapped regions. Because these 228 * gaps are exceptionally huge areas in usual address space, excluding these 229 * two biggest unmapped regions will be sufficient to make a trade-off. 230 * 231 * <heap> 232 * <BIG UNMAPPED REGION 1> 233 * <uppermost mmap()-ed region> 234 * (other mmap()-ed regions and small unmapped regions) 235 * <lowermost mmap()-ed region> 236 * <BIG UNMAPPED REGION 2> 237 * <stack> 238 */ 239 static void __damon_va_init_regions(struct damon_ctx *ctx, 240 struct damon_target *t) 241 { 242 struct damon_target *ti; 243 struct damon_region *r; 244 struct damon_addr_range regions[3]; 245 unsigned long sz = 0, nr_pieces; 246 int i, tidx = 0; 247 248 if (damon_va_three_regions(t, regions)) { 249 damon_for_each_target(ti, ctx) { 250 if (ti == t) 251 break; 252 tidx++; 253 } 254 pr_debug("Failed to get three regions of %dth target\n", tidx); 255 return; 256 } 257 258 for (i = 0; i < 3; i++) 259 sz += regions[i].end - regions[i].start; 260 if (ctx->attrs.min_nr_regions) 261 sz /= ctx->attrs.min_nr_regions; 262 if (sz < DAMON_MIN_REGION) 263 sz = DAMON_MIN_REGION; 264 265 /* Set the initial three regions of the target */ 266 for (i = 0; i < 3; i++) { 267 r = damon_new_region(regions[i].start, regions[i].end); 268 if (!r) { 269 pr_err("%d'th init region creation failed\n", i); 270 return; 271 } 272 damon_add_region(r, t); 273 274 nr_pieces = (regions[i].end - regions[i].start) / sz; 275 damon_va_evenly_split_region(t, r, nr_pieces); 276 } 277 } 278 279 /* Initialize '->regions_list' of every target (task) */ 280 static void damon_va_init(struct damon_ctx *ctx) 281 { 282 struct damon_target *t; 283 284 damon_for_each_target(t, ctx) { 285 /* the user may set the target regions as they want */ 286 if (!damon_nr_regions(t)) 287 __damon_va_init_regions(ctx, t); 288 } 289 } 290 291 /* 292 * Update regions for current memory mappings 293 */ 294 static void damon_va_update(struct damon_ctx *ctx) 295 { 296 struct damon_addr_range three_regions[3]; 297 struct damon_target *t; 298 299 damon_for_each_target(t, ctx) { 300 if (damon_va_three_regions(t, three_regions)) 301 continue; 302 damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); 303 } 304 } 305 306 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 307 unsigned long next, struct mm_walk *walk) 308 { 309 pte_t *pte; 310 spinlock_t *ptl; 311 312 ptl = pmd_trans_huge_lock(pmd, walk->vma); 313 if (ptl) { 314 pmd_t pmde = pmdp_get(pmd); 315 316 if (pmd_present(pmde)) 317 damon_pmdp_mkold(pmd, walk->vma, addr); 318 spin_unlock(ptl); 319 return 0; 320 } 321 322 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 323 if (!pte) 324 return 0; 325 if (!pte_present(ptep_get(pte))) 326 goto out; 327 damon_ptep_mkold(pte, walk->vma, addr); 328 out: 329 pte_unmap_unlock(pte, ptl); 330 return 0; 331 } 332 333 #ifdef CONFIG_HUGETLB_PAGE 334 static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 335 struct vm_area_struct *vma, unsigned long addr) 336 { 337 bool referenced = false; 338 pte_t entry = huge_ptep_get(mm, addr, pte); 339 struct folio *folio = pfn_folio(pte_pfn(entry)); 340 unsigned long psize = huge_page_size(hstate_vma(vma)); 341 342 folio_get(folio); 343 344 if (pte_young(entry)) { 345 referenced = true; 346 entry = pte_mkold(entry); 347 set_huge_pte_at(mm, addr, pte, entry, psize); 348 } 349 350 if (mmu_notifier_clear_young(mm, addr, 351 addr + huge_page_size(hstate_vma(vma)))) 352 referenced = true; 353 354 if (referenced) 355 folio_set_young(folio); 356 357 folio_set_idle(folio); 358 folio_put(folio); 359 } 360 361 static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 362 unsigned long addr, unsigned long end, 363 struct mm_walk *walk) 364 { 365 struct hstate *h = hstate_vma(walk->vma); 366 spinlock_t *ptl; 367 pte_t entry; 368 369 ptl = huge_pte_lock(h, walk->mm, pte); 370 entry = huge_ptep_get(walk->mm, addr, pte); 371 if (!pte_present(entry)) 372 goto out; 373 374 damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 375 376 out: 377 spin_unlock(ptl); 378 return 0; 379 } 380 #else 381 #define damon_mkold_hugetlb_entry NULL 382 #endif /* CONFIG_HUGETLB_PAGE */ 383 384 static const struct mm_walk_ops damon_mkold_ops = { 385 .pmd_entry = damon_mkold_pmd_entry, 386 .hugetlb_entry = damon_mkold_hugetlb_entry, 387 .walk_lock = PGWALK_RDLOCK, 388 }; 389 390 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 391 { 392 mmap_read_lock(mm); 393 walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 394 mmap_read_unlock(mm); 395 } 396 397 /* 398 * Functions for the access checking of the regions 399 */ 400 401 static void __damon_va_prepare_access_check(struct mm_struct *mm, 402 struct damon_region *r) 403 { 404 r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 405 406 damon_va_mkold(mm, r->sampling_addr); 407 } 408 409 static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 410 { 411 struct damon_target *t; 412 struct mm_struct *mm; 413 struct damon_region *r; 414 415 damon_for_each_target(t, ctx) { 416 mm = damon_get_mm(t); 417 if (!mm) 418 continue; 419 damon_for_each_region(r, t) 420 __damon_va_prepare_access_check(mm, r); 421 mmput(mm); 422 } 423 } 424 425 struct damon_young_walk_private { 426 /* size of the folio for the access checked virtual memory address */ 427 unsigned long *folio_sz; 428 bool young; 429 }; 430 431 static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 432 unsigned long next, struct mm_walk *walk) 433 { 434 pte_t *pte; 435 pte_t ptent; 436 spinlock_t *ptl; 437 struct folio *folio; 438 struct damon_young_walk_private *priv = walk->private; 439 440 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 441 ptl = pmd_trans_huge_lock(pmd, walk->vma); 442 if (ptl) { 443 pmd_t pmde = pmdp_get(pmd); 444 445 if (!pmd_present(pmde)) 446 goto huge_out; 447 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 448 if (!folio) 449 goto huge_out; 450 if (pmd_young(pmde) || !folio_test_idle(folio) || 451 mmu_notifier_test_young(walk->mm, 452 addr)) 453 priv->young = true; 454 *priv->folio_sz = HPAGE_PMD_SIZE; 455 huge_out: 456 spin_unlock(ptl); 457 return 0; 458 } 459 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 460 461 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 462 if (!pte) 463 return 0; 464 ptent = ptep_get(pte); 465 if (!pte_present(ptent)) 466 goto out; 467 folio = vm_normal_folio(walk->vma, addr, ptent); 468 if (!folio) 469 goto out; 470 if (pte_young(ptent) || !folio_test_idle(folio) || 471 mmu_notifier_test_young(walk->mm, addr)) 472 priv->young = true; 473 *priv->folio_sz = folio_size(folio); 474 out: 475 pte_unmap_unlock(pte, ptl); 476 return 0; 477 } 478 479 #ifdef CONFIG_HUGETLB_PAGE 480 static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 481 unsigned long addr, unsigned long end, 482 struct mm_walk *walk) 483 { 484 struct damon_young_walk_private *priv = walk->private; 485 struct hstate *h = hstate_vma(walk->vma); 486 struct folio *folio; 487 spinlock_t *ptl; 488 pte_t entry; 489 490 ptl = huge_pte_lock(h, walk->mm, pte); 491 entry = huge_ptep_get(walk->mm, addr, pte); 492 if (!pte_present(entry)) 493 goto out; 494 495 folio = pfn_folio(pte_pfn(entry)); 496 folio_get(folio); 497 498 if (pte_young(entry) || !folio_test_idle(folio) || 499 mmu_notifier_test_young(walk->mm, addr)) 500 priv->young = true; 501 *priv->folio_sz = huge_page_size(h); 502 503 folio_put(folio); 504 505 out: 506 spin_unlock(ptl); 507 return 0; 508 } 509 #else 510 #define damon_young_hugetlb_entry NULL 511 #endif /* CONFIG_HUGETLB_PAGE */ 512 513 static const struct mm_walk_ops damon_young_ops = { 514 .pmd_entry = damon_young_pmd_entry, 515 .hugetlb_entry = damon_young_hugetlb_entry, 516 .walk_lock = PGWALK_RDLOCK, 517 }; 518 519 static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 520 unsigned long *folio_sz) 521 { 522 struct damon_young_walk_private arg = { 523 .folio_sz = folio_sz, 524 .young = false, 525 }; 526 527 mmap_read_lock(mm); 528 walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 529 mmap_read_unlock(mm); 530 return arg.young; 531 } 532 533 /* 534 * Check whether the region was accessed after the last preparation 535 * 536 * mm 'mm_struct' for the given virtual address space 537 * r the region to be checked 538 */ 539 static void __damon_va_check_access(struct mm_struct *mm, 540 struct damon_region *r, bool same_target, 541 struct damon_attrs *attrs) 542 { 543 static unsigned long last_addr; 544 static unsigned long last_folio_sz = PAGE_SIZE; 545 static bool last_accessed; 546 547 if (!mm) { 548 damon_update_region_access_rate(r, false, attrs); 549 return; 550 } 551 552 /* If the region is in the last checked page, reuse the result */ 553 if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == 554 ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { 555 damon_update_region_access_rate(r, last_accessed, attrs); 556 return; 557 } 558 559 last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); 560 damon_update_region_access_rate(r, last_accessed, attrs); 561 562 last_addr = r->sampling_addr; 563 } 564 565 static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 566 { 567 struct damon_target *t; 568 struct mm_struct *mm; 569 struct damon_region *r; 570 unsigned int max_nr_accesses = 0; 571 bool same_target; 572 573 damon_for_each_target(t, ctx) { 574 mm = damon_get_mm(t); 575 same_target = false; 576 damon_for_each_region(r, t) { 577 __damon_va_check_access(mm, r, same_target, 578 &ctx->attrs); 579 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 580 same_target = true; 581 } 582 if (mm) 583 mmput(mm); 584 } 585 586 return max_nr_accesses; 587 } 588 589 static bool damos_va_filter_young_match(struct damos_filter *filter, 590 struct folio *folio, struct vm_area_struct *vma, 591 unsigned long addr, pte_t *ptep, pmd_t *pmdp) 592 { 593 bool young = false; 594 595 if (ptep) 596 young = pte_young(ptep_get(ptep)); 597 else if (pmdp) 598 young = pmd_young(pmdp_get(pmdp)); 599 600 young = young || !folio_test_idle(folio) || 601 mmu_notifier_test_young(vma->vm_mm, addr); 602 603 if (young && ptep) 604 damon_ptep_mkold(ptep, vma, addr); 605 else if (young && pmdp) 606 damon_pmdp_mkold(pmdp, vma, addr); 607 608 return young == filter->matching; 609 } 610 611 static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, 612 struct vm_area_struct *vma, unsigned long addr, 613 pte_t *ptep, pmd_t *pmdp) 614 { 615 struct damos_filter *filter; 616 bool matched; 617 618 if (scheme->core_filters_allowed) 619 return false; 620 621 damos_for_each_ops_filter(filter, scheme) { 622 /* 623 * damos_folio_filter_match checks the young filter by doing an 624 * rmap on the folio to find its page table. However, being the 625 * vaddr scheme, we have direct access to the page tables, so 626 * use that instead. 627 */ 628 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) 629 matched = damos_va_filter_young_match(filter, folio, 630 vma, addr, ptep, pmdp); 631 else 632 matched = damos_folio_filter_match(filter, folio); 633 634 if (matched) 635 return !filter->allow; 636 } 637 return scheme->ops_filters_default_reject; 638 } 639 640 struct damos_va_migrate_private { 641 struct list_head *migration_lists; 642 struct damos *scheme; 643 }; 644 645 /* 646 * Place the given folio in the migration_list corresponding to where the folio 647 * should be migrated. 648 * 649 * The algorithm used here is similar to weighted_interleave_nid() 650 */ 651 static void damos_va_migrate_dests_add(struct folio *folio, 652 struct vm_area_struct *vma, unsigned long addr, 653 struct damos_migrate_dests *dests, 654 struct list_head *migration_lists) 655 { 656 pgoff_t ilx; 657 int order; 658 unsigned int target; 659 unsigned int weight_total = 0; 660 int i; 661 662 /* 663 * If dests is empty, there is only one migration list corresponding 664 * to s->target_nid. 665 */ 666 if (!dests->nr_dests) { 667 i = 0; 668 goto isolate; 669 } 670 671 order = folio_order(folio); 672 ilx = vma->vm_pgoff >> order; 673 ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); 674 675 for (i = 0; i < dests->nr_dests; i++) 676 weight_total += dests->weight_arr[i]; 677 678 /* If the total weights are somehow 0, don't migrate at all */ 679 if (!weight_total) 680 return; 681 682 target = ilx % weight_total; 683 for (i = 0; i < dests->nr_dests; i++) { 684 if (target < dests->weight_arr[i]) 685 break; 686 target -= dests->weight_arr[i]; 687 } 688 689 /* If the folio is already in the right node, don't do anything */ 690 if (folio_nid(folio) == dests->node_id_arr[i]) 691 return; 692 693 isolate: 694 if (!folio_isolate_lru(folio)) 695 return; 696 697 list_add(&folio->lru, &migration_lists[i]); 698 } 699 700 static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, 701 unsigned long next, struct mm_walk *walk) 702 { 703 struct damos_va_migrate_private *priv = walk->private; 704 struct list_head *migration_lists = priv->migration_lists; 705 struct damos *s = priv->scheme; 706 struct damos_migrate_dests *dests = &s->migrate_dests; 707 struct folio *folio; 708 spinlock_t *ptl; 709 pte_t *start_pte, *pte, ptent; 710 int nr; 711 712 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 713 ptl = pmd_trans_huge_lock(pmd, walk->vma); 714 if (ptl) { 715 pmd_t pmde = pmdp_get(pmd); 716 717 if (!pmd_present(pmde)) 718 goto huge_out; 719 folio = vm_normal_folio_pmd(walk->vma, addr, pmde); 720 if (!folio) 721 goto huge_out; 722 if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) 723 goto huge_out; 724 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 725 migration_lists); 726 huge_out: 727 spin_unlock(ptl); 728 return 0; 729 } 730 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 731 732 start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 733 if (!pte) 734 return 0; 735 736 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 737 nr = 1; 738 ptent = ptep_get(pte); 739 740 if (pte_none(ptent) || !pte_present(ptent)) 741 continue; 742 folio = vm_normal_folio(walk->vma, addr, ptent); 743 if (!folio) 744 continue; 745 if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) 746 return 0; 747 damos_va_migrate_dests_add(folio, walk->vma, addr, dests, 748 migration_lists); 749 nr = folio_nr_pages(folio); 750 } 751 pte_unmap_unlock(start_pte, ptl); 752 return 0; 753 } 754 755 /* 756 * Functions for the target validity check and cleanup 757 */ 758 759 static bool damon_va_target_valid(struct damon_target *t) 760 { 761 struct task_struct *task; 762 763 task = damon_get_task_struct(t); 764 if (task) { 765 put_task_struct(task); 766 return true; 767 } 768 769 return false; 770 } 771 772 static void damon_va_cleanup_target(struct damon_target *t) 773 { 774 put_pid(t->pid); 775 } 776 777 #ifndef CONFIG_ADVISE_SYSCALLS 778 static unsigned long damos_madvise(struct damon_target *target, 779 struct damon_region *r, int behavior) 780 { 781 return 0; 782 } 783 #else 784 static unsigned long damos_madvise(struct damon_target *target, 785 struct damon_region *r, int behavior) 786 { 787 struct mm_struct *mm; 788 unsigned long start = PAGE_ALIGN(r->ar.start); 789 unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 790 unsigned long applied; 791 792 mm = damon_get_mm(target); 793 if (!mm) 794 return 0; 795 796 applied = do_madvise(mm, start, len, behavior) ? 0 : len; 797 mmput(mm); 798 799 return applied; 800 } 801 #endif /* CONFIG_ADVISE_SYSCALLS */ 802 803 static unsigned long damos_va_migrate(struct damon_target *target, 804 struct damon_region *r, struct damos *s, 805 unsigned long *sz_filter_passed) 806 { 807 LIST_HEAD(folio_list); 808 struct damos_va_migrate_private priv; 809 struct mm_struct *mm; 810 int nr_dests; 811 int nid; 812 bool use_target_nid; 813 unsigned long applied = 0; 814 struct damos_migrate_dests *dests = &s->migrate_dests; 815 struct mm_walk_ops walk_ops = { 816 .pmd_entry = damos_va_migrate_pmd_entry, 817 .pte_entry = NULL, 818 .walk_lock = PGWALK_RDLOCK, 819 }; 820 821 use_target_nid = dests->nr_dests == 0; 822 nr_dests = use_target_nid ? 1 : dests->nr_dests; 823 priv.scheme = s; 824 priv.migration_lists = kmalloc_array(nr_dests, 825 sizeof(*priv.migration_lists), GFP_KERNEL); 826 if (!priv.migration_lists) 827 return 0; 828 829 for (int i = 0; i < nr_dests; i++) 830 INIT_LIST_HEAD(&priv.migration_lists[i]); 831 832 833 mm = damon_get_mm(target); 834 if (!mm) 835 goto free_lists; 836 837 mmap_read_lock(mm); 838 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 839 mmap_read_unlock(mm); 840 mmput(mm); 841 842 for (int i = 0; i < nr_dests; i++) { 843 nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; 844 applied += damon_migrate_pages(&priv.migration_lists[i], nid); 845 cond_resched(); 846 } 847 848 free_lists: 849 kfree(priv.migration_lists); 850 return applied * PAGE_SIZE; 851 } 852 853 struct damos_va_stat_private { 854 struct damos *scheme; 855 unsigned long *sz_filter_passed; 856 }; 857 858 static inline bool damos_va_invalid_folio(struct folio *folio, 859 struct damos *s) 860 { 861 return !folio || folio == s->last_applied; 862 } 863 864 static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, 865 unsigned long next, struct mm_walk *walk) 866 { 867 struct damos_va_stat_private *priv = walk->private; 868 struct damos *s = priv->scheme; 869 unsigned long *sz_filter_passed = priv->sz_filter_passed; 870 struct vm_area_struct *vma = walk->vma; 871 struct folio *folio; 872 spinlock_t *ptl; 873 pte_t *start_pte, *pte, ptent; 874 int nr; 875 876 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 877 ptl = pmd_trans_huge_lock(pmd, vma); 878 if (ptl) { 879 pmd_t pmde = pmdp_get(pmd); 880 881 if (!pmd_present(pmde)) 882 goto huge_unlock; 883 884 folio = vm_normal_folio_pmd(vma, addr, pmde); 885 886 if (damos_va_invalid_folio(folio, s)) 887 goto huge_unlock; 888 889 if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) 890 *sz_filter_passed += folio_size(folio); 891 s->last_applied = folio; 892 893 huge_unlock: 894 spin_unlock(ptl); 895 return 0; 896 } 897 #endif 898 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 899 if (!start_pte) 900 return 0; 901 902 for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { 903 nr = 1; 904 ptent = ptep_get(pte); 905 906 if (pte_none(ptent) || !pte_present(ptent)) 907 continue; 908 909 folio = vm_normal_folio(vma, addr, ptent); 910 911 if (damos_va_invalid_folio(folio, s)) 912 continue; 913 914 if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) 915 *sz_filter_passed += folio_size(folio); 916 nr = folio_nr_pages(folio); 917 s->last_applied = folio; 918 } 919 pte_unmap_unlock(start_pte, ptl); 920 return 0; 921 } 922 923 static unsigned long damos_va_stat(struct damon_target *target, 924 struct damon_region *r, struct damos *s, 925 unsigned long *sz_filter_passed) 926 { 927 struct damos_va_stat_private priv; 928 struct mm_struct *mm; 929 struct mm_walk_ops walk_ops = { 930 .pmd_entry = damos_va_stat_pmd_entry, 931 .walk_lock = PGWALK_RDLOCK, 932 }; 933 934 priv.scheme = s; 935 priv.sz_filter_passed = sz_filter_passed; 936 937 if (!damos_ops_has_filter(s)) 938 return 0; 939 940 mm = damon_get_mm(target); 941 if (!mm) 942 return 0; 943 944 mmap_read_lock(mm); 945 walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); 946 mmap_read_unlock(mm); 947 mmput(mm); 948 return 0; 949 } 950 951 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 952 struct damon_target *t, struct damon_region *r, 953 struct damos *scheme, unsigned long *sz_filter_passed) 954 { 955 int madv_action; 956 957 switch (scheme->action) { 958 case DAMOS_WILLNEED: 959 madv_action = MADV_WILLNEED; 960 break; 961 case DAMOS_COLD: 962 madv_action = MADV_COLD; 963 break; 964 case DAMOS_PAGEOUT: 965 madv_action = MADV_PAGEOUT; 966 break; 967 case DAMOS_HUGEPAGE: 968 madv_action = MADV_HUGEPAGE; 969 break; 970 case DAMOS_NOHUGEPAGE: 971 madv_action = MADV_NOHUGEPAGE; 972 break; 973 case DAMOS_MIGRATE_HOT: 974 case DAMOS_MIGRATE_COLD: 975 return damos_va_migrate(t, r, scheme, sz_filter_passed); 976 case DAMOS_STAT: 977 return damos_va_stat(t, r, scheme, sz_filter_passed); 978 default: 979 /* 980 * DAMOS actions that are not yet supported by 'vaddr'. 981 */ 982 return 0; 983 } 984 985 return damos_madvise(t, r, madv_action); 986 } 987 988 static int damon_va_scheme_score(struct damon_ctx *context, 989 struct damon_target *t, struct damon_region *r, 990 struct damos *scheme) 991 { 992 993 switch (scheme->action) { 994 case DAMOS_PAGEOUT: 995 return damon_cold_score(context, r, scheme); 996 case DAMOS_MIGRATE_HOT: 997 return damon_hot_score(context, r, scheme); 998 case DAMOS_MIGRATE_COLD: 999 return damon_cold_score(context, r, scheme); 1000 default: 1001 break; 1002 } 1003 1004 return DAMOS_MAX_SCORE; 1005 } 1006 1007 static int __init damon_va_initcall(void) 1008 { 1009 struct damon_operations ops = { 1010 .id = DAMON_OPS_VADDR, 1011 .init = damon_va_init, 1012 .update = damon_va_update, 1013 .prepare_access_checks = damon_va_prepare_access_checks, 1014 .check_accesses = damon_va_check_accesses, 1015 .target_valid = damon_va_target_valid, 1016 .cleanup_target = damon_va_cleanup_target, 1017 .cleanup = NULL, 1018 .apply_scheme = damon_va_apply_scheme, 1019 .get_scheme_score = damon_va_scheme_score, 1020 }; 1021 /* ops for fixed virtual address ranges */ 1022 struct damon_operations ops_fvaddr = ops; 1023 int err; 1024 1025 /* Don't set the monitoring target regions for the entire mapping */ 1026 ops_fvaddr.id = DAMON_OPS_FVADDR; 1027 ops_fvaddr.init = NULL; 1028 ops_fvaddr.update = NULL; 1029 1030 err = damon_register_ops(&ops); 1031 if (err) 1032 return err; 1033 return damon_register_ops(&ops_fvaddr); 1034 }; 1035 1036 subsys_initcall(damon_va_initcall); 1037 1038 #include "tests/vaddr-kunit.h" 1039