1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/psi.h> 15 #include <linux/slab.h> 16 #include <linux/string.h> 17 #include <linux/string_choices.h> 18 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/damon.h> 21 22 #ifdef CONFIG_DAMON_KUNIT_TEST 23 #undef DAMON_MIN_REGION 24 #define DAMON_MIN_REGION 1 25 #endif 26 27 static DEFINE_MUTEX(damon_lock); 28 static int nr_running_ctxs; 29 static bool running_exclusive_ctxs; 30 31 static DEFINE_MUTEX(damon_ops_lock); 32 static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; 33 34 static struct kmem_cache *damon_region_cache __ro_after_init; 35 36 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ 37 static bool __damon_is_registered_ops(enum damon_ops_id id) 38 { 39 struct damon_operations empty_ops = {}; 40 41 if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops))) 42 return false; 43 return true; 44 } 45 46 /** 47 * damon_is_registered_ops() - Check if a given damon_operations is registered. 48 * @id: Id of the damon_operations to check if registered. 49 * 50 * Return: true if the ops is set, false otherwise. 51 */ 52 bool damon_is_registered_ops(enum damon_ops_id id) 53 { 54 bool registered; 55 56 if (id >= NR_DAMON_OPS) 57 return false; 58 mutex_lock(&damon_ops_lock); 59 registered = __damon_is_registered_ops(id); 60 mutex_unlock(&damon_ops_lock); 61 return registered; 62 } 63 64 /** 65 * damon_register_ops() - Register a monitoring operations set to DAMON. 66 * @ops: monitoring operations set to register. 67 * 68 * This function registers a monitoring operations set of valid &struct 69 * damon_operations->id so that others can find and use them later. 70 * 71 * Return: 0 on success, negative error code otherwise. 72 */ 73 int damon_register_ops(struct damon_operations *ops) 74 { 75 int err = 0; 76 77 if (ops->id >= NR_DAMON_OPS) 78 return -EINVAL; 79 mutex_lock(&damon_ops_lock); 80 /* Fail for already registered ops */ 81 if (__damon_is_registered_ops(ops->id)) { 82 err = -EINVAL; 83 goto out; 84 } 85 damon_registered_ops[ops->id] = *ops; 86 out: 87 mutex_unlock(&damon_ops_lock); 88 return err; 89 } 90 91 /** 92 * damon_select_ops() - Select a monitoring operations to use with the context. 93 * @ctx: monitoring context to use the operations. 94 * @id: id of the registered monitoring operations to select. 95 * 96 * This function finds registered monitoring operations set of @id and make 97 * @ctx to use it. 98 * 99 * Return: 0 on success, negative error code otherwise. 100 */ 101 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) 102 { 103 int err = 0; 104 105 if (id >= NR_DAMON_OPS) 106 return -EINVAL; 107 108 mutex_lock(&damon_ops_lock); 109 if (!__damon_is_registered_ops(id)) 110 err = -EINVAL; 111 else 112 ctx->ops = damon_registered_ops[id]; 113 mutex_unlock(&damon_ops_lock); 114 return err; 115 } 116 117 /* 118 * Construct a damon_region struct 119 * 120 * Returns the pointer to the new struct if success, or NULL otherwise 121 */ 122 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 123 { 124 struct damon_region *region; 125 126 region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL); 127 if (!region) 128 return NULL; 129 130 region->ar.start = start; 131 region->ar.end = end; 132 region->nr_accesses = 0; 133 region->nr_accesses_bp = 0; 134 INIT_LIST_HEAD(®ion->list); 135 136 region->age = 0; 137 region->last_nr_accesses = 0; 138 139 return region; 140 } 141 142 void damon_add_region(struct damon_region *r, struct damon_target *t) 143 { 144 list_add_tail(&r->list, &t->regions_list); 145 t->nr_regions++; 146 } 147 148 static void damon_del_region(struct damon_region *r, struct damon_target *t) 149 { 150 list_del(&r->list); 151 t->nr_regions--; 152 } 153 154 static void damon_free_region(struct damon_region *r) 155 { 156 kmem_cache_free(damon_region_cache, r); 157 } 158 159 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 160 { 161 damon_del_region(r, t); 162 damon_free_region(r); 163 } 164 165 /* 166 * Check whether a region is intersecting an address range 167 * 168 * Returns true if it is. 169 */ 170 static bool damon_intersect(struct damon_region *r, 171 struct damon_addr_range *re) 172 { 173 return !(r->ar.end <= re->start || re->end <= r->ar.start); 174 } 175 176 /* 177 * Fill holes in regions with new regions. 178 */ 179 static int damon_fill_regions_holes(struct damon_region *first, 180 struct damon_region *last, struct damon_target *t) 181 { 182 struct damon_region *r = first; 183 184 damon_for_each_region_from(r, t) { 185 struct damon_region *next, *newr; 186 187 if (r == last) 188 break; 189 next = damon_next_region(r); 190 if (r->ar.end != next->ar.start) { 191 newr = damon_new_region(r->ar.end, next->ar.start); 192 if (!newr) 193 return -ENOMEM; 194 damon_insert_region(newr, r, next, t); 195 } 196 } 197 return 0; 198 } 199 200 /* 201 * damon_set_regions() - Set regions of a target for given address ranges. 202 * @t: the given target. 203 * @ranges: array of new monitoring target ranges. 204 * @nr_ranges: length of @ranges. 205 * 206 * This function adds new regions to, or modify existing regions of a 207 * monitoring target to fit in specific ranges. 208 * 209 * Return: 0 if success, or negative error code otherwise. 210 */ 211 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, 212 unsigned int nr_ranges) 213 { 214 struct damon_region *r, *next; 215 unsigned int i; 216 int err; 217 218 /* Remove regions which are not in the new ranges */ 219 damon_for_each_region_safe(r, next, t) { 220 for (i = 0; i < nr_ranges; i++) { 221 if (damon_intersect(r, &ranges[i])) 222 break; 223 } 224 if (i == nr_ranges) 225 damon_destroy_region(r, t); 226 } 227 228 r = damon_first_region(t); 229 /* Add new regions or resize existing regions to fit in the ranges */ 230 for (i = 0; i < nr_ranges; i++) { 231 struct damon_region *first = NULL, *last, *newr; 232 struct damon_addr_range *range; 233 234 range = &ranges[i]; 235 /* Get the first/last regions intersecting with the range */ 236 damon_for_each_region_from(r, t) { 237 if (damon_intersect(r, range)) { 238 if (!first) 239 first = r; 240 last = r; 241 } 242 if (r->ar.start >= range->end) 243 break; 244 } 245 if (!first) { 246 /* no region intersects with this range */ 247 newr = damon_new_region( 248 ALIGN_DOWN(range->start, 249 DAMON_MIN_REGION), 250 ALIGN(range->end, DAMON_MIN_REGION)); 251 if (!newr) 252 return -ENOMEM; 253 damon_insert_region(newr, damon_prev_region(r), r, t); 254 } else { 255 /* resize intersecting regions to fit in this range */ 256 first->ar.start = ALIGN_DOWN(range->start, 257 DAMON_MIN_REGION); 258 last->ar.end = ALIGN(range->end, DAMON_MIN_REGION); 259 260 /* fill possible holes in the range */ 261 err = damon_fill_regions_holes(first, last, t); 262 if (err) 263 return err; 264 } 265 } 266 return 0; 267 } 268 269 struct damos_filter *damos_new_filter(enum damos_filter_type type, 270 bool matching, bool allow) 271 { 272 struct damos_filter *filter; 273 274 filter = kmalloc(sizeof(*filter), GFP_KERNEL); 275 if (!filter) 276 return NULL; 277 filter->type = type; 278 filter->matching = matching; 279 filter->allow = allow; 280 INIT_LIST_HEAD(&filter->list); 281 return filter; 282 } 283 284 void damos_add_filter(struct damos *s, struct damos_filter *f) 285 { 286 list_add_tail(&f->list, &s->filters); 287 } 288 289 static void damos_del_filter(struct damos_filter *f) 290 { 291 list_del(&f->list); 292 } 293 294 static void damos_free_filter(struct damos_filter *f) 295 { 296 kfree(f); 297 } 298 299 void damos_destroy_filter(struct damos_filter *f) 300 { 301 damos_del_filter(f); 302 damos_free_filter(f); 303 } 304 305 struct damos_quota_goal *damos_new_quota_goal( 306 enum damos_quota_goal_metric metric, 307 unsigned long target_value) 308 { 309 struct damos_quota_goal *goal; 310 311 goal = kmalloc(sizeof(*goal), GFP_KERNEL); 312 if (!goal) 313 return NULL; 314 goal->metric = metric; 315 goal->target_value = target_value; 316 INIT_LIST_HEAD(&goal->list); 317 return goal; 318 } 319 320 void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g) 321 { 322 list_add_tail(&g->list, &q->goals); 323 } 324 325 static void damos_del_quota_goal(struct damos_quota_goal *g) 326 { 327 list_del(&g->list); 328 } 329 330 static void damos_free_quota_goal(struct damos_quota_goal *g) 331 { 332 kfree(g); 333 } 334 335 void damos_destroy_quota_goal(struct damos_quota_goal *g) 336 { 337 damos_del_quota_goal(g); 338 damos_free_quota_goal(g); 339 } 340 341 /* initialize fields of @quota that normally API users wouldn't set */ 342 static struct damos_quota *damos_quota_init(struct damos_quota *quota) 343 { 344 quota->esz = 0; 345 quota->total_charged_sz = 0; 346 quota->total_charged_ns = 0; 347 quota->charged_sz = 0; 348 quota->charged_from = 0; 349 quota->charge_target_from = NULL; 350 quota->charge_addr_from = 0; 351 quota->esz_bp = 0; 352 return quota; 353 } 354 355 struct damos *damon_new_scheme(struct damos_access_pattern *pattern, 356 enum damos_action action, 357 unsigned long apply_interval_us, 358 struct damos_quota *quota, 359 struct damos_watermarks *wmarks, 360 int target_nid) 361 { 362 struct damos *scheme; 363 364 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 365 if (!scheme) 366 return NULL; 367 scheme->pattern = *pattern; 368 scheme->action = action; 369 scheme->apply_interval_us = apply_interval_us; 370 /* 371 * next_apply_sis will be set when kdamond starts. While kdamond is 372 * running, it will also updated when it is added to the DAMON context, 373 * or damon_attrs are updated. 374 */ 375 scheme->next_apply_sis = 0; 376 scheme->walk_completed = false; 377 INIT_LIST_HEAD(&scheme->filters); 378 scheme->stat = (struct damos_stat){}; 379 INIT_LIST_HEAD(&scheme->list); 380 381 scheme->quota = *(damos_quota_init(quota)); 382 /* quota.goals should be separately set by caller */ 383 INIT_LIST_HEAD(&scheme->quota.goals); 384 385 scheme->wmarks = *wmarks; 386 scheme->wmarks.activated = true; 387 388 scheme->target_nid = target_nid; 389 390 return scheme; 391 } 392 393 static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) 394 { 395 unsigned long sample_interval = ctx->attrs.sample_interval ? 396 ctx->attrs.sample_interval : 1; 397 unsigned long apply_interval = s->apply_interval_us ? 398 s->apply_interval_us : ctx->attrs.aggr_interval; 399 400 s->next_apply_sis = ctx->passed_sample_intervals + 401 apply_interval / sample_interval; 402 } 403 404 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 405 { 406 list_add_tail(&s->list, &ctx->schemes); 407 damos_set_next_apply_sis(s, ctx); 408 } 409 410 static void damon_del_scheme(struct damos *s) 411 { 412 list_del(&s->list); 413 } 414 415 static void damon_free_scheme(struct damos *s) 416 { 417 kfree(s); 418 } 419 420 void damon_destroy_scheme(struct damos *s) 421 { 422 struct damos_quota_goal *g, *g_next; 423 struct damos_filter *f, *next; 424 425 damos_for_each_quota_goal_safe(g, g_next, &s->quota) 426 damos_destroy_quota_goal(g); 427 428 damos_for_each_filter_safe(f, next, s) 429 damos_destroy_filter(f); 430 damon_del_scheme(s); 431 damon_free_scheme(s); 432 } 433 434 /* 435 * Construct a damon_target struct 436 * 437 * Returns the pointer to the new struct if success, or NULL otherwise 438 */ 439 struct damon_target *damon_new_target(void) 440 { 441 struct damon_target *t; 442 443 t = kmalloc(sizeof(*t), GFP_KERNEL); 444 if (!t) 445 return NULL; 446 447 t->pid = NULL; 448 t->nr_regions = 0; 449 INIT_LIST_HEAD(&t->regions_list); 450 INIT_LIST_HEAD(&t->list); 451 452 return t; 453 } 454 455 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 456 { 457 list_add_tail(&t->list, &ctx->adaptive_targets); 458 } 459 460 bool damon_targets_empty(struct damon_ctx *ctx) 461 { 462 return list_empty(&ctx->adaptive_targets); 463 } 464 465 static void damon_del_target(struct damon_target *t) 466 { 467 list_del(&t->list); 468 } 469 470 void damon_free_target(struct damon_target *t) 471 { 472 struct damon_region *r, *next; 473 474 damon_for_each_region_safe(r, next, t) 475 damon_free_region(r); 476 kfree(t); 477 } 478 479 void damon_destroy_target(struct damon_target *t) 480 { 481 damon_del_target(t); 482 damon_free_target(t); 483 } 484 485 unsigned int damon_nr_regions(struct damon_target *t) 486 { 487 return t->nr_regions; 488 } 489 490 struct damon_ctx *damon_new_ctx(void) 491 { 492 struct damon_ctx *ctx; 493 494 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 495 if (!ctx) 496 return NULL; 497 498 init_completion(&ctx->kdamond_started); 499 500 ctx->attrs.sample_interval = 5 * 1000; 501 ctx->attrs.aggr_interval = 100 * 1000; 502 ctx->attrs.ops_update_interval = 60 * 1000 * 1000; 503 504 ctx->passed_sample_intervals = 0; 505 /* These will be set from kdamond_init_intervals_sis() */ 506 ctx->next_aggregation_sis = 0; 507 ctx->next_ops_update_sis = 0; 508 509 mutex_init(&ctx->kdamond_lock); 510 mutex_init(&ctx->call_control_lock); 511 mutex_init(&ctx->walk_control_lock); 512 513 ctx->attrs.min_nr_regions = 10; 514 ctx->attrs.max_nr_regions = 1000; 515 516 INIT_LIST_HEAD(&ctx->adaptive_targets); 517 INIT_LIST_HEAD(&ctx->schemes); 518 519 return ctx; 520 } 521 522 static void damon_destroy_targets(struct damon_ctx *ctx) 523 { 524 struct damon_target *t, *next_t; 525 526 if (ctx->ops.cleanup) { 527 ctx->ops.cleanup(ctx); 528 return; 529 } 530 531 damon_for_each_target_safe(t, next_t, ctx) 532 damon_destroy_target(t); 533 } 534 535 void damon_destroy_ctx(struct damon_ctx *ctx) 536 { 537 struct damos *s, *next_s; 538 539 damon_destroy_targets(ctx); 540 541 damon_for_each_scheme_safe(s, next_s, ctx) 542 damon_destroy_scheme(s); 543 544 kfree(ctx); 545 } 546 547 static unsigned int damon_age_for_new_attrs(unsigned int age, 548 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 549 { 550 return age * old_attrs->aggr_interval / new_attrs->aggr_interval; 551 } 552 553 /* convert access ratio in bp (per 10,000) to nr_accesses */ 554 static unsigned int damon_accesses_bp_to_nr_accesses( 555 unsigned int accesses_bp, struct damon_attrs *attrs) 556 { 557 return accesses_bp * damon_max_nr_accesses(attrs) / 10000; 558 } 559 560 /* 561 * Convert nr_accesses to access ratio in bp (per 10,000). 562 * 563 * Callers should ensure attrs.aggr_interval is not zero, like 564 * damon_update_monitoring_results() does . Otherwise, divide-by-zero would 565 * happen. 566 */ 567 static unsigned int damon_nr_accesses_to_accesses_bp( 568 unsigned int nr_accesses, struct damon_attrs *attrs) 569 { 570 return nr_accesses * 10000 / damon_max_nr_accesses(attrs); 571 } 572 573 static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, 574 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 575 { 576 return damon_accesses_bp_to_nr_accesses( 577 damon_nr_accesses_to_accesses_bp( 578 nr_accesses, old_attrs), 579 new_attrs); 580 } 581 582 static void damon_update_monitoring_result(struct damon_region *r, 583 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 584 { 585 r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, 586 old_attrs, new_attrs); 587 r->nr_accesses_bp = r->nr_accesses * 10000; 588 r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); 589 } 590 591 /* 592 * region->nr_accesses is the number of sampling intervals in the last 593 * aggregation interval that access to the region has found, and region->age is 594 * the number of aggregation intervals that its access pattern has maintained. 595 * For the reason, the real meaning of the two fields depend on current 596 * sampling interval and aggregation interval. This function updates 597 * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. 598 */ 599 static void damon_update_monitoring_results(struct damon_ctx *ctx, 600 struct damon_attrs *new_attrs) 601 { 602 struct damon_attrs *old_attrs = &ctx->attrs; 603 struct damon_target *t; 604 struct damon_region *r; 605 606 /* if any interval is zero, simply forgive conversion */ 607 if (!old_attrs->sample_interval || !old_attrs->aggr_interval || 608 !new_attrs->sample_interval || 609 !new_attrs->aggr_interval) 610 return; 611 612 damon_for_each_target(t, ctx) 613 damon_for_each_region(r, t) 614 damon_update_monitoring_result( 615 r, old_attrs, new_attrs); 616 } 617 618 /** 619 * damon_set_attrs() - Set attributes for the monitoring. 620 * @ctx: monitoring context 621 * @attrs: monitoring attributes 622 * 623 * This function should be called while the kdamond is not running, or an 624 * access check results aggregation is not ongoing (e.g., from 625 * &struct damon_callback->after_aggregation or 626 * &struct damon_callback->after_wmarks_check callbacks). 627 * 628 * Every time interval is in micro-seconds. 629 * 630 * Return: 0 on success, negative error code otherwise. 631 */ 632 int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) 633 { 634 unsigned long sample_interval = attrs->sample_interval ? 635 attrs->sample_interval : 1; 636 struct damos *s; 637 638 if (attrs->min_nr_regions < 3) 639 return -EINVAL; 640 if (attrs->min_nr_regions > attrs->max_nr_regions) 641 return -EINVAL; 642 if (attrs->sample_interval > attrs->aggr_interval) 643 return -EINVAL; 644 645 ctx->next_aggregation_sis = ctx->passed_sample_intervals + 646 attrs->aggr_interval / sample_interval; 647 ctx->next_ops_update_sis = ctx->passed_sample_intervals + 648 attrs->ops_update_interval / sample_interval; 649 650 damon_update_monitoring_results(ctx, attrs); 651 ctx->attrs = *attrs; 652 653 damon_for_each_scheme(s, ctx) 654 damos_set_next_apply_sis(s, ctx); 655 656 return 0; 657 } 658 659 /** 660 * damon_set_schemes() - Set data access monitoring based operation schemes. 661 * @ctx: monitoring context 662 * @schemes: array of the schemes 663 * @nr_schemes: number of entries in @schemes 664 * 665 * This function should not be called while the kdamond of the context is 666 * running. 667 */ 668 void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 669 ssize_t nr_schemes) 670 { 671 struct damos *s, *next; 672 ssize_t i; 673 674 damon_for_each_scheme_safe(s, next, ctx) 675 damon_destroy_scheme(s); 676 for (i = 0; i < nr_schemes; i++) 677 damon_add_scheme(ctx, schemes[i]); 678 } 679 680 static struct damos_quota_goal *damos_nth_quota_goal( 681 int n, struct damos_quota *q) 682 { 683 struct damos_quota_goal *goal; 684 int i = 0; 685 686 damos_for_each_quota_goal(goal, q) { 687 if (i++ == n) 688 return goal; 689 } 690 return NULL; 691 } 692 693 static void damos_commit_quota_goal( 694 struct damos_quota_goal *dst, struct damos_quota_goal *src) 695 { 696 dst->metric = src->metric; 697 dst->target_value = src->target_value; 698 if (dst->metric == DAMOS_QUOTA_USER_INPUT) 699 dst->current_value = src->current_value; 700 /* keep last_psi_total as is, since it will be updated in next cycle */ 701 } 702 703 /** 704 * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota. 705 * @dst: The commit destination DAMOS quota. 706 * @src: The commit source DAMOS quota. 707 * 708 * Copies user-specified parameters for quota goals from @src to @dst. Users 709 * should use this function for quota goals-level parameters update of running 710 * DAMON contexts, instead of manual in-place updates. 711 * 712 * This function should be called from parameters-update safe context, like 713 * DAMON callbacks. 714 */ 715 int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) 716 { 717 struct damos_quota_goal *dst_goal, *next, *src_goal, *new_goal; 718 int i = 0, j = 0; 719 720 damos_for_each_quota_goal_safe(dst_goal, next, dst) { 721 src_goal = damos_nth_quota_goal(i++, src); 722 if (src_goal) 723 damos_commit_quota_goal(dst_goal, src_goal); 724 else 725 damos_destroy_quota_goal(dst_goal); 726 } 727 damos_for_each_quota_goal_safe(src_goal, next, src) { 728 if (j++ < i) 729 continue; 730 new_goal = damos_new_quota_goal( 731 src_goal->metric, src_goal->target_value); 732 if (!new_goal) 733 return -ENOMEM; 734 damos_add_quota_goal(dst, new_goal); 735 } 736 return 0; 737 } 738 739 static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src) 740 { 741 int err; 742 743 dst->reset_interval = src->reset_interval; 744 dst->ms = src->ms; 745 dst->sz = src->sz; 746 err = damos_commit_quota_goals(dst, src); 747 if (err) 748 return err; 749 dst->weight_sz = src->weight_sz; 750 dst->weight_nr_accesses = src->weight_nr_accesses; 751 dst->weight_age = src->weight_age; 752 return 0; 753 } 754 755 static struct damos_filter *damos_nth_filter(int n, struct damos *s) 756 { 757 struct damos_filter *filter; 758 int i = 0; 759 760 damos_for_each_filter(filter, s) { 761 if (i++ == n) 762 return filter; 763 } 764 return NULL; 765 } 766 767 static void damos_commit_filter_arg( 768 struct damos_filter *dst, struct damos_filter *src) 769 { 770 switch (dst->type) { 771 case DAMOS_FILTER_TYPE_MEMCG: 772 dst->memcg_id = src->memcg_id; 773 break; 774 case DAMOS_FILTER_TYPE_ADDR: 775 dst->addr_range = src->addr_range; 776 break; 777 case DAMOS_FILTER_TYPE_TARGET: 778 dst->target_idx = src->target_idx; 779 break; 780 default: 781 break; 782 } 783 } 784 785 static void damos_commit_filter( 786 struct damos_filter *dst, struct damos_filter *src) 787 { 788 dst->type = src->type; 789 dst->matching = src->matching; 790 damos_commit_filter_arg(dst, src); 791 } 792 793 static int damos_commit_filters(struct damos *dst, struct damos *src) 794 { 795 struct damos_filter *dst_filter, *next, *src_filter, *new_filter; 796 int i = 0, j = 0; 797 798 damos_for_each_filter_safe(dst_filter, next, dst) { 799 src_filter = damos_nth_filter(i++, src); 800 if (src_filter) 801 damos_commit_filter(dst_filter, src_filter); 802 else 803 damos_destroy_filter(dst_filter); 804 } 805 806 damos_for_each_filter_safe(src_filter, next, src) { 807 if (j++ < i) 808 continue; 809 810 new_filter = damos_new_filter( 811 src_filter->type, src_filter->matching, 812 src_filter->allow); 813 if (!new_filter) 814 return -ENOMEM; 815 damos_commit_filter_arg(new_filter, src_filter); 816 damos_add_filter(dst, new_filter); 817 } 818 return 0; 819 } 820 821 static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx) 822 { 823 struct damos *s; 824 int i = 0; 825 826 damon_for_each_scheme(s, ctx) { 827 if (i++ == n) 828 return s; 829 } 830 return NULL; 831 } 832 833 static int damos_commit(struct damos *dst, struct damos *src) 834 { 835 int err; 836 837 dst->pattern = src->pattern; 838 dst->action = src->action; 839 dst->apply_interval_us = src->apply_interval_us; 840 841 err = damos_commit_quota(&dst->quota, &src->quota); 842 if (err) 843 return err; 844 845 dst->wmarks = src->wmarks; 846 847 err = damos_commit_filters(dst, src); 848 return err; 849 } 850 851 static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src) 852 { 853 struct damos *dst_scheme, *next, *src_scheme, *new_scheme; 854 int i = 0, j = 0, err; 855 856 damon_for_each_scheme_safe(dst_scheme, next, dst) { 857 src_scheme = damon_nth_scheme(i++, src); 858 if (src_scheme) { 859 err = damos_commit(dst_scheme, src_scheme); 860 if (err) 861 return err; 862 } else { 863 damon_destroy_scheme(dst_scheme); 864 } 865 } 866 867 damon_for_each_scheme_safe(src_scheme, next, src) { 868 if (j++ < i) 869 continue; 870 new_scheme = damon_new_scheme(&src_scheme->pattern, 871 src_scheme->action, 872 src_scheme->apply_interval_us, 873 &src_scheme->quota, &src_scheme->wmarks, 874 NUMA_NO_NODE); 875 if (!new_scheme) 876 return -ENOMEM; 877 err = damos_commit(new_scheme, src_scheme); 878 if (err) { 879 damon_destroy_scheme(new_scheme); 880 return err; 881 } 882 damon_add_scheme(dst, new_scheme); 883 } 884 return 0; 885 } 886 887 static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx) 888 { 889 struct damon_target *t; 890 int i = 0; 891 892 damon_for_each_target(t, ctx) { 893 if (i++ == n) 894 return t; 895 } 896 return NULL; 897 } 898 899 /* 900 * The caller should ensure the regions of @src are 901 * 1. valid (end >= src) and 902 * 2. sorted by starting address. 903 * 904 * If @src has no region, @dst keeps current regions. 905 */ 906 static int damon_commit_target_regions( 907 struct damon_target *dst, struct damon_target *src) 908 { 909 struct damon_region *src_region; 910 struct damon_addr_range *ranges; 911 int i = 0, err; 912 913 damon_for_each_region(src_region, src) 914 i++; 915 if (!i) 916 return 0; 917 918 ranges = kmalloc_array(i, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN); 919 if (!ranges) 920 return -ENOMEM; 921 i = 0; 922 damon_for_each_region(src_region, src) 923 ranges[i++] = src_region->ar; 924 err = damon_set_regions(dst, ranges, i); 925 kfree(ranges); 926 return err; 927 } 928 929 static int damon_commit_target( 930 struct damon_target *dst, bool dst_has_pid, 931 struct damon_target *src, bool src_has_pid) 932 { 933 int err; 934 935 err = damon_commit_target_regions(dst, src); 936 if (err) 937 return err; 938 if (dst_has_pid) 939 put_pid(dst->pid); 940 if (src_has_pid) 941 get_pid(src->pid); 942 dst->pid = src->pid; 943 return 0; 944 } 945 946 static int damon_commit_targets( 947 struct damon_ctx *dst, struct damon_ctx *src) 948 { 949 struct damon_target *dst_target, *next, *src_target, *new_target; 950 int i = 0, j = 0, err; 951 952 damon_for_each_target_safe(dst_target, next, dst) { 953 src_target = damon_nth_target(i++, src); 954 if (src_target) { 955 err = damon_commit_target( 956 dst_target, damon_target_has_pid(dst), 957 src_target, damon_target_has_pid(src)); 958 if (err) 959 return err; 960 } else { 961 if (damon_target_has_pid(dst)) 962 put_pid(dst_target->pid); 963 damon_destroy_target(dst_target); 964 } 965 } 966 967 damon_for_each_target_safe(src_target, next, src) { 968 if (j++ < i) 969 continue; 970 new_target = damon_new_target(); 971 if (!new_target) 972 return -ENOMEM; 973 err = damon_commit_target(new_target, false, 974 src_target, damon_target_has_pid(src)); 975 if (err) { 976 damon_destroy_target(new_target); 977 return err; 978 } 979 damon_add_target(dst, new_target); 980 } 981 return 0; 982 } 983 984 /** 985 * damon_commit_ctx() - Commit parameters of a DAMON context to another. 986 * @dst: The commit destination DAMON context. 987 * @src: The commit source DAMON context. 988 * 989 * This function copies user-specified parameters from @src to @dst and update 990 * the internal status and results accordingly. Users should use this function 991 * for context-level parameters update of running context, instead of manual 992 * in-place updates. 993 * 994 * This function should be called from parameters-update safe context, like 995 * DAMON callbacks. 996 */ 997 int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) 998 { 999 int err; 1000 1001 err = damon_commit_schemes(dst, src); 1002 if (err) 1003 return err; 1004 err = damon_commit_targets(dst, src); 1005 if (err) 1006 return err; 1007 /* 1008 * schemes and targets should be updated first, since 1009 * 1. damon_set_attrs() updates monitoring results of targets and 1010 * next_apply_sis of schemes, and 1011 * 2. ops update should be done after pid handling is done (target 1012 * committing require putting pids). 1013 */ 1014 err = damon_set_attrs(dst, &src->attrs); 1015 if (err) 1016 return err; 1017 dst->ops = src->ops; 1018 1019 return 0; 1020 } 1021 1022 /** 1023 * damon_nr_running_ctxs() - Return number of currently running contexts. 1024 */ 1025 int damon_nr_running_ctxs(void) 1026 { 1027 int nr_ctxs; 1028 1029 mutex_lock(&damon_lock); 1030 nr_ctxs = nr_running_ctxs; 1031 mutex_unlock(&damon_lock); 1032 1033 return nr_ctxs; 1034 } 1035 1036 /* Returns the size upper limit for each monitoring region */ 1037 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 1038 { 1039 struct damon_target *t; 1040 struct damon_region *r; 1041 unsigned long sz = 0; 1042 1043 damon_for_each_target(t, ctx) { 1044 damon_for_each_region(r, t) 1045 sz += damon_sz_region(r); 1046 } 1047 1048 if (ctx->attrs.min_nr_regions) 1049 sz /= ctx->attrs.min_nr_regions; 1050 if (sz < DAMON_MIN_REGION) 1051 sz = DAMON_MIN_REGION; 1052 1053 return sz; 1054 } 1055 1056 static int kdamond_fn(void *data); 1057 1058 /* 1059 * __damon_start() - Starts monitoring with given context. 1060 * @ctx: monitoring context 1061 * 1062 * This function should be called while damon_lock is hold. 1063 * 1064 * Return: 0 on success, negative error code otherwise. 1065 */ 1066 static int __damon_start(struct damon_ctx *ctx) 1067 { 1068 int err = -EBUSY; 1069 1070 mutex_lock(&ctx->kdamond_lock); 1071 if (!ctx->kdamond) { 1072 err = 0; 1073 reinit_completion(&ctx->kdamond_started); 1074 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 1075 nr_running_ctxs); 1076 if (IS_ERR(ctx->kdamond)) { 1077 err = PTR_ERR(ctx->kdamond); 1078 ctx->kdamond = NULL; 1079 } else { 1080 wait_for_completion(&ctx->kdamond_started); 1081 } 1082 } 1083 mutex_unlock(&ctx->kdamond_lock); 1084 1085 return err; 1086 } 1087 1088 /** 1089 * damon_start() - Starts the monitorings for a given group of contexts. 1090 * @ctxs: an array of the pointers for contexts to start monitoring 1091 * @nr_ctxs: size of @ctxs 1092 * @exclusive: exclusiveness of this contexts group 1093 * 1094 * This function starts a group of monitoring threads for a group of monitoring 1095 * contexts. One thread per each context is created and run in parallel. The 1096 * caller should handle synchronization between the threads by itself. If 1097 * @exclusive is true and a group of threads that created by other 1098 * 'damon_start()' call is currently running, this function does nothing but 1099 * returns -EBUSY. 1100 * 1101 * Return: 0 on success, negative error code otherwise. 1102 */ 1103 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) 1104 { 1105 int i; 1106 int err = 0; 1107 1108 mutex_lock(&damon_lock); 1109 if ((exclusive && nr_running_ctxs) || 1110 (!exclusive && running_exclusive_ctxs)) { 1111 mutex_unlock(&damon_lock); 1112 return -EBUSY; 1113 } 1114 1115 for (i = 0; i < nr_ctxs; i++) { 1116 err = __damon_start(ctxs[i]); 1117 if (err) 1118 break; 1119 nr_running_ctxs++; 1120 } 1121 if (exclusive && nr_running_ctxs) 1122 running_exclusive_ctxs = true; 1123 mutex_unlock(&damon_lock); 1124 1125 return err; 1126 } 1127 1128 /* 1129 * __damon_stop() - Stops monitoring of a given context. 1130 * @ctx: monitoring context 1131 * 1132 * Return: 0 on success, negative error code otherwise. 1133 */ 1134 static int __damon_stop(struct damon_ctx *ctx) 1135 { 1136 struct task_struct *tsk; 1137 1138 mutex_lock(&ctx->kdamond_lock); 1139 tsk = ctx->kdamond; 1140 if (tsk) { 1141 get_task_struct(tsk); 1142 mutex_unlock(&ctx->kdamond_lock); 1143 kthread_stop_put(tsk); 1144 return 0; 1145 } 1146 mutex_unlock(&ctx->kdamond_lock); 1147 1148 return -EPERM; 1149 } 1150 1151 /** 1152 * damon_stop() - Stops the monitorings for a given group of contexts. 1153 * @ctxs: an array of the pointers for contexts to stop monitoring 1154 * @nr_ctxs: size of @ctxs 1155 * 1156 * Return: 0 on success, negative error code otherwise. 1157 */ 1158 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 1159 { 1160 int i, err = 0; 1161 1162 for (i = 0; i < nr_ctxs; i++) { 1163 /* nr_running_ctxs is decremented in kdamond_fn */ 1164 err = __damon_stop(ctxs[i]); 1165 if (err) 1166 break; 1167 } 1168 return err; 1169 } 1170 1171 static bool damon_is_running(struct damon_ctx *ctx) 1172 { 1173 bool running; 1174 1175 mutex_lock(&ctx->kdamond_lock); 1176 running = ctx->kdamond != NULL; 1177 mutex_unlock(&ctx->kdamond_lock); 1178 return running; 1179 } 1180 1181 /** 1182 * damon_call() - Invoke a given function on DAMON worker thread (kdamond). 1183 * @ctx: DAMON context to call the function for. 1184 * @control: Control variable of the call request. 1185 * 1186 * Ask DAMON worker thread (kdamond) of @ctx to call a function with an 1187 * argument data that respectively passed via &damon_call_control->fn and 1188 * &damon_call_control->data of @control, and wait until the kdamond finishes 1189 * handling of the request. 1190 * 1191 * The kdamond executes the function with the argument in the main loop, just 1192 * after a sampling of the iteration is finished. The function can hence 1193 * safely access the internal data of the &struct damon_ctx without additional 1194 * synchronization. The return value of the function will be saved in 1195 * &damon_call_control->return_code. 1196 * 1197 * Return: 0 on success, negative error code otherwise. 1198 */ 1199 int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) 1200 { 1201 init_completion(&control->completion); 1202 control->canceled = false; 1203 1204 mutex_lock(&ctx->call_control_lock); 1205 if (ctx->call_control) { 1206 mutex_unlock(&ctx->call_control_lock); 1207 return -EBUSY; 1208 } 1209 ctx->call_control = control; 1210 mutex_unlock(&ctx->call_control_lock); 1211 if (!damon_is_running(ctx)) 1212 return -EINVAL; 1213 wait_for_completion(&control->completion); 1214 if (control->canceled) 1215 return -ECANCELED; 1216 return 0; 1217 } 1218 1219 /** 1220 * damos_walk() - Invoke a given functions while DAMOS walk regions. 1221 * @ctx: DAMON context to call the functions for. 1222 * @control: Control variable of the walk request. 1223 * 1224 * Ask DAMON worker thread (kdamond) of @ctx to call a function for each region 1225 * that the kdamond will apply DAMOS action to, and wait until the kdamond 1226 * finishes handling of the request. 1227 * 1228 * The kdamond executes the given function in the main loop, for each region 1229 * just after it applied any DAMOS actions of @ctx to it. The invocation is 1230 * made only within one &damos->apply_interval_us since damos_walk() 1231 * invocation, for each scheme. The given callback function can hence safely 1232 * access the internal data of &struct damon_ctx and &struct damon_region that 1233 * each of the scheme will apply the action for next interval, without 1234 * additional synchronizations against the kdamond. If every scheme of @ctx 1235 * passed at least one &damos->apply_interval_us, kdamond marks the request as 1236 * completed so that damos_walk() can wakeup and return. 1237 * 1238 * Return: 0 on success, negative error code otherwise. 1239 */ 1240 int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) 1241 { 1242 init_completion(&control->completion); 1243 control->canceled = false; 1244 mutex_lock(&ctx->walk_control_lock); 1245 if (ctx->walk_control) { 1246 mutex_unlock(&ctx->walk_control_lock); 1247 return -EBUSY; 1248 } 1249 ctx->walk_control = control; 1250 mutex_unlock(&ctx->walk_control_lock); 1251 if (!damon_is_running(ctx)) 1252 return -EINVAL; 1253 wait_for_completion(&control->completion); 1254 if (control->canceled) 1255 return -ECANCELED; 1256 return 0; 1257 } 1258 1259 /* 1260 * Reset the aggregated monitoring results ('nr_accesses' of each region). 1261 */ 1262 static void kdamond_reset_aggregated(struct damon_ctx *c) 1263 { 1264 struct damon_target *t; 1265 unsigned int ti = 0; /* target's index */ 1266 1267 damon_for_each_target(t, c) { 1268 struct damon_region *r; 1269 1270 damon_for_each_region(r, t) { 1271 trace_damon_aggregated(ti, r, damon_nr_regions(t)); 1272 r->last_nr_accesses = r->nr_accesses; 1273 r->nr_accesses = 0; 1274 } 1275 ti++; 1276 } 1277 } 1278 1279 static void damon_split_region_at(struct damon_target *t, 1280 struct damon_region *r, unsigned long sz_r); 1281 1282 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 1283 { 1284 unsigned long sz; 1285 unsigned int nr_accesses = r->nr_accesses_bp / 10000; 1286 1287 sz = damon_sz_region(r); 1288 return s->pattern.min_sz_region <= sz && 1289 sz <= s->pattern.max_sz_region && 1290 s->pattern.min_nr_accesses <= nr_accesses && 1291 nr_accesses <= s->pattern.max_nr_accesses && 1292 s->pattern.min_age_region <= r->age && 1293 r->age <= s->pattern.max_age_region; 1294 } 1295 1296 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 1297 struct damon_region *r, struct damos *s) 1298 { 1299 bool ret = __damos_valid_target(r, s); 1300 1301 if (!ret || !s->quota.esz || !c->ops.get_scheme_score) 1302 return ret; 1303 1304 return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; 1305 } 1306 1307 /* 1308 * damos_skip_charged_region() - Check if the given region or starting part of 1309 * it is already charged for the DAMOS quota. 1310 * @t: The target of the region. 1311 * @rp: The pointer to the region. 1312 * @s: The scheme to be applied. 1313 * 1314 * If a quota of a scheme has exceeded in a quota charge window, the scheme's 1315 * action would applied to only a part of the target access pattern fulfilling 1316 * regions. To avoid applying the scheme action to only already applied 1317 * regions, DAMON skips applying the scheme action to the regions that charged 1318 * in the previous charge window. 1319 * 1320 * This function checks if a given region should be skipped or not for the 1321 * reason. If only the starting part of the region has previously charged, 1322 * this function splits the region into two so that the second one covers the 1323 * area that not charged in the previous charge widnow and saves the second 1324 * region in *rp and returns false, so that the caller can apply DAMON action 1325 * to the second one. 1326 * 1327 * Return: true if the region should be entirely skipped, false otherwise. 1328 */ 1329 static bool damos_skip_charged_region(struct damon_target *t, 1330 struct damon_region **rp, struct damos *s) 1331 { 1332 struct damon_region *r = *rp; 1333 struct damos_quota *quota = &s->quota; 1334 unsigned long sz_to_skip; 1335 1336 /* Skip previously charged regions */ 1337 if (quota->charge_target_from) { 1338 if (t != quota->charge_target_from) 1339 return true; 1340 if (r == damon_last_region(t)) { 1341 quota->charge_target_from = NULL; 1342 quota->charge_addr_from = 0; 1343 return true; 1344 } 1345 if (quota->charge_addr_from && 1346 r->ar.end <= quota->charge_addr_from) 1347 return true; 1348 1349 if (quota->charge_addr_from && r->ar.start < 1350 quota->charge_addr_from) { 1351 sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - 1352 r->ar.start, DAMON_MIN_REGION); 1353 if (!sz_to_skip) { 1354 if (damon_sz_region(r) <= DAMON_MIN_REGION) 1355 return true; 1356 sz_to_skip = DAMON_MIN_REGION; 1357 } 1358 damon_split_region_at(t, r, sz_to_skip); 1359 r = damon_next_region(r); 1360 *rp = r; 1361 } 1362 quota->charge_target_from = NULL; 1363 quota->charge_addr_from = 0; 1364 } 1365 return false; 1366 } 1367 1368 static void damos_update_stat(struct damos *s, 1369 unsigned long sz_tried, unsigned long sz_applied, 1370 unsigned long sz_ops_filter_passed) 1371 { 1372 s->stat.nr_tried++; 1373 s->stat.sz_tried += sz_tried; 1374 if (sz_applied) 1375 s->stat.nr_applied++; 1376 s->stat.sz_applied += sz_applied; 1377 s->stat.sz_ops_filter_passed += sz_ops_filter_passed; 1378 } 1379 1380 static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t, 1381 struct damon_region *r, struct damos_filter *filter) 1382 { 1383 bool matched = false; 1384 struct damon_target *ti; 1385 int target_idx = 0; 1386 unsigned long start, end; 1387 1388 switch (filter->type) { 1389 case DAMOS_FILTER_TYPE_TARGET: 1390 damon_for_each_target(ti, ctx) { 1391 if (ti == t) 1392 break; 1393 target_idx++; 1394 } 1395 matched = target_idx == filter->target_idx; 1396 break; 1397 case DAMOS_FILTER_TYPE_ADDR: 1398 start = ALIGN_DOWN(filter->addr_range.start, DAMON_MIN_REGION); 1399 end = ALIGN_DOWN(filter->addr_range.end, DAMON_MIN_REGION); 1400 1401 /* inside the range */ 1402 if (start <= r->ar.start && r->ar.end <= end) { 1403 matched = true; 1404 break; 1405 } 1406 /* outside of the range */ 1407 if (r->ar.end <= start || end <= r->ar.start) { 1408 matched = false; 1409 break; 1410 } 1411 /* start before the range and overlap */ 1412 if (r->ar.start < start) { 1413 damon_split_region_at(t, r, start - r->ar.start); 1414 matched = false; 1415 break; 1416 } 1417 /* start inside the range */ 1418 damon_split_region_at(t, r, end - r->ar.start); 1419 matched = true; 1420 break; 1421 default: 1422 return false; 1423 } 1424 1425 return matched == filter->matching; 1426 } 1427 1428 static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, 1429 struct damon_region *r, struct damos *s) 1430 { 1431 struct damos_filter *filter; 1432 1433 s->core_filters_allowed = false; 1434 damos_for_each_filter(filter, s) { 1435 if (damos_filter_match(ctx, t, r, filter)) { 1436 if (filter->allow) 1437 s->core_filters_allowed = true; 1438 return !filter->allow; 1439 } 1440 } 1441 return false; 1442 } 1443 1444 /* 1445 * damos_walk_call_walk() - Call &damos_walk_control->walk_fn. 1446 * @ctx: The context of &damon_ctx->walk_control. 1447 * @t: The monitoring target of @r that @s will be applied. 1448 * @r: The region of @t that @s will be applied. 1449 * @s: The scheme of @ctx that will be applied to @r. 1450 * 1451 * This function is called from kdamond whenever it asked the operation set to 1452 * apply a DAMOS scheme action to a region. If a DAMOS walk request is 1453 * installed by damos_walk() and not yet uninstalled, invoke it. 1454 */ 1455 static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t, 1456 struct damon_region *r, struct damos *s, 1457 unsigned long sz_filter_passed) 1458 { 1459 struct damos_walk_control *control; 1460 1461 mutex_lock(&ctx->walk_control_lock); 1462 control = ctx->walk_control; 1463 mutex_unlock(&ctx->walk_control_lock); 1464 if (!control) 1465 return; 1466 control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed); 1467 } 1468 1469 /* 1470 * damos_walk_complete() - Complete DAMOS walk request if all walks are done. 1471 * @ctx: The context of &damon_ctx->walk_control. 1472 * @s: A scheme of @ctx that all walks are now done. 1473 * 1474 * This function is called when kdamond finished applying the action of a DAMOS 1475 * scheme to all regions that eligible for the given &damos->apply_interval_us. 1476 * If every scheme of @ctx including @s now finished walking for at least one 1477 * &damos->apply_interval_us, this function makrs the handling of the given 1478 * DAMOS walk request is done, so that damos_walk() can wake up and return. 1479 */ 1480 static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) 1481 { 1482 struct damos *siter; 1483 struct damos_walk_control *control; 1484 1485 mutex_lock(&ctx->walk_control_lock); 1486 control = ctx->walk_control; 1487 mutex_unlock(&ctx->walk_control_lock); 1488 if (!control) 1489 return; 1490 1491 s->walk_completed = true; 1492 /* if all schemes completed, signal completion to walker */ 1493 damon_for_each_scheme(siter, ctx) { 1494 if (!siter->walk_completed) 1495 return; 1496 } 1497 complete(&control->completion); 1498 mutex_lock(&ctx->walk_control_lock); 1499 ctx->walk_control = NULL; 1500 mutex_unlock(&ctx->walk_control_lock); 1501 } 1502 1503 /* 1504 * damos_walk_cancel() - Cancel the current DAMOS walk request. 1505 * @ctx: The context of &damon_ctx->walk_control. 1506 * 1507 * This function is called when @ctx is deactivated by DAMOS watermarks, DAMOS 1508 * walk is requested but there is no DAMOS scheme to walk for, or the kdamond 1509 * is already out of the main loop and therefore gonna be terminated, and hence 1510 * cannot continue the walks. This function therefore marks the walk request 1511 * as canceled, so that damos_walk() can wake up and return. 1512 */ 1513 static void damos_walk_cancel(struct damon_ctx *ctx) 1514 { 1515 struct damos_walk_control *control; 1516 1517 mutex_lock(&ctx->walk_control_lock); 1518 control = ctx->walk_control; 1519 mutex_unlock(&ctx->walk_control_lock); 1520 1521 if (!control) 1522 return; 1523 control->canceled = true; 1524 complete(&control->completion); 1525 mutex_lock(&ctx->walk_control_lock); 1526 ctx->walk_control = NULL; 1527 mutex_unlock(&ctx->walk_control_lock); 1528 } 1529 1530 static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, 1531 struct damon_region *r, struct damos *s) 1532 { 1533 struct damos_quota *quota = &s->quota; 1534 unsigned long sz = damon_sz_region(r); 1535 struct timespec64 begin, end; 1536 unsigned long sz_applied = 0; 1537 unsigned long sz_ops_filter_passed = 0; 1538 int err = 0; 1539 /* 1540 * We plan to support multiple context per kdamond, as DAMON sysfs 1541 * implies with 'nr_contexts' file. Nevertheless, only single context 1542 * per kdamond is supported for now. So, we can simply use '0' context 1543 * index here. 1544 */ 1545 unsigned int cidx = 0; 1546 struct damos *siter; /* schemes iterator */ 1547 unsigned int sidx = 0; 1548 struct damon_target *titer; /* targets iterator */ 1549 unsigned int tidx = 0; 1550 bool do_trace = false; 1551 1552 /* get indices for trace_damos_before_apply() */ 1553 if (trace_damos_before_apply_enabled()) { 1554 damon_for_each_scheme(siter, c) { 1555 if (siter == s) 1556 break; 1557 sidx++; 1558 } 1559 damon_for_each_target(titer, c) { 1560 if (titer == t) 1561 break; 1562 tidx++; 1563 } 1564 do_trace = true; 1565 } 1566 1567 if (c->ops.apply_scheme) { 1568 if (quota->esz && quota->charged_sz + sz > quota->esz) { 1569 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 1570 DAMON_MIN_REGION); 1571 if (!sz) 1572 goto update_stat; 1573 damon_split_region_at(t, r, sz); 1574 } 1575 if (damos_filter_out(c, t, r, s)) 1576 return; 1577 ktime_get_coarse_ts64(&begin); 1578 if (c->callback.before_damos_apply) 1579 err = c->callback.before_damos_apply(c, t, r, s); 1580 if (!err) { 1581 trace_damos_before_apply(cidx, sidx, tidx, r, 1582 damon_nr_regions(t), do_trace); 1583 sz_applied = c->ops.apply_scheme(c, t, r, s, 1584 &sz_ops_filter_passed); 1585 } 1586 damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed); 1587 ktime_get_coarse_ts64(&end); 1588 quota->total_charged_ns += timespec64_to_ns(&end) - 1589 timespec64_to_ns(&begin); 1590 quota->charged_sz += sz; 1591 if (quota->esz && quota->charged_sz >= quota->esz) { 1592 quota->charge_target_from = t; 1593 quota->charge_addr_from = r->ar.end + 1; 1594 } 1595 } 1596 if (s->action != DAMOS_STAT) 1597 r->age = 0; 1598 1599 update_stat: 1600 damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed); 1601 } 1602 1603 static void damon_do_apply_schemes(struct damon_ctx *c, 1604 struct damon_target *t, 1605 struct damon_region *r) 1606 { 1607 struct damos *s; 1608 1609 damon_for_each_scheme(s, c) { 1610 struct damos_quota *quota = &s->quota; 1611 1612 if (c->passed_sample_intervals < s->next_apply_sis) 1613 continue; 1614 1615 if (!s->wmarks.activated) 1616 continue; 1617 1618 /* Check the quota */ 1619 if (quota->esz && quota->charged_sz >= quota->esz) 1620 continue; 1621 1622 if (damos_skip_charged_region(t, &r, s)) 1623 continue; 1624 1625 if (!damos_valid_target(c, t, r, s)) 1626 continue; 1627 1628 damos_apply_scheme(c, t, r, s); 1629 } 1630 } 1631 1632 /* 1633 * damon_feed_loop_next_input() - get next input to achieve a target score. 1634 * @last_input The last input. 1635 * @score Current score that made with @last_input. 1636 * 1637 * Calculate next input to achieve the target score, based on the last input 1638 * and current score. Assuming the input and the score are positively 1639 * proportional, calculate how much compensation should be added to or 1640 * subtracted from the last input as a proportion of the last input. Avoid 1641 * next input always being zero by setting it non-zero always. In short form 1642 * (assuming support of float and signed calculations), the algorithm is as 1643 * below. 1644 * 1645 * next_input = max(last_input * ((goal - current) / goal + 1), 1) 1646 * 1647 * For simple implementation, we assume the target score is always 10,000. The 1648 * caller should adjust @score for this. 1649 * 1650 * Returns next input that assumed to achieve the target score. 1651 */ 1652 static unsigned long damon_feed_loop_next_input(unsigned long last_input, 1653 unsigned long score) 1654 { 1655 const unsigned long goal = 10000; 1656 /* Set minimum input as 10000 to avoid compensation be zero */ 1657 const unsigned long min_input = 10000; 1658 unsigned long score_goal_diff, compensation; 1659 bool over_achieving = score > goal; 1660 1661 if (score == goal) 1662 return last_input; 1663 if (score >= goal * 2) 1664 return min_input; 1665 1666 if (over_achieving) 1667 score_goal_diff = score - goal; 1668 else 1669 score_goal_diff = goal - score; 1670 1671 if (last_input < ULONG_MAX / score_goal_diff) 1672 compensation = last_input * score_goal_diff / goal; 1673 else 1674 compensation = last_input / goal * score_goal_diff; 1675 1676 if (over_achieving) 1677 return max(last_input - compensation, min_input); 1678 if (last_input < ULONG_MAX - compensation) 1679 return last_input + compensation; 1680 return ULONG_MAX; 1681 } 1682 1683 #ifdef CONFIG_PSI 1684 1685 static u64 damos_get_some_mem_psi_total(void) 1686 { 1687 if (static_branch_likely(&psi_disabled)) 1688 return 0; 1689 return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2], 1690 NSEC_PER_USEC); 1691 } 1692 1693 #else /* CONFIG_PSI */ 1694 1695 static inline u64 damos_get_some_mem_psi_total(void) 1696 { 1697 return 0; 1698 }; 1699 1700 #endif /* CONFIG_PSI */ 1701 1702 static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal) 1703 { 1704 u64 now_psi_total; 1705 1706 switch (goal->metric) { 1707 case DAMOS_QUOTA_USER_INPUT: 1708 /* User should already set goal->current_value */ 1709 break; 1710 case DAMOS_QUOTA_SOME_MEM_PSI_US: 1711 now_psi_total = damos_get_some_mem_psi_total(); 1712 goal->current_value = now_psi_total - goal->last_psi_total; 1713 goal->last_psi_total = now_psi_total; 1714 break; 1715 default: 1716 break; 1717 } 1718 } 1719 1720 /* Return the highest score since it makes schemes least aggressive */ 1721 static unsigned long damos_quota_score(struct damos_quota *quota) 1722 { 1723 struct damos_quota_goal *goal; 1724 unsigned long highest_score = 0; 1725 1726 damos_for_each_quota_goal(goal, quota) { 1727 damos_set_quota_goal_current_value(goal); 1728 highest_score = max(highest_score, 1729 goal->current_value * 10000 / 1730 goal->target_value); 1731 } 1732 1733 return highest_score; 1734 } 1735 1736 /* 1737 * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty 1738 */ 1739 static void damos_set_effective_quota(struct damos_quota *quota) 1740 { 1741 unsigned long throughput; 1742 unsigned long esz = ULONG_MAX; 1743 1744 if (!quota->ms && list_empty("a->goals)) { 1745 quota->esz = quota->sz; 1746 return; 1747 } 1748 1749 if (!list_empty("a->goals)) { 1750 unsigned long score = damos_quota_score(quota); 1751 1752 quota->esz_bp = damon_feed_loop_next_input( 1753 max(quota->esz_bp, 10000UL), 1754 score); 1755 esz = quota->esz_bp / 10000; 1756 } 1757 1758 if (quota->ms) { 1759 if (quota->total_charged_ns) 1760 throughput = quota->total_charged_sz * 1000000 / 1761 quota->total_charged_ns; 1762 else 1763 throughput = PAGE_SIZE * 1024; 1764 esz = min(throughput * quota->ms, esz); 1765 } 1766 1767 if (quota->sz && quota->sz < esz) 1768 esz = quota->sz; 1769 1770 quota->esz = esz; 1771 } 1772 1773 static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) 1774 { 1775 struct damos_quota *quota = &s->quota; 1776 struct damon_target *t; 1777 struct damon_region *r; 1778 unsigned long cumulated_sz; 1779 unsigned int score, max_score = 0; 1780 1781 if (!quota->ms && !quota->sz && list_empty("a->goals)) 1782 return; 1783 1784 /* New charge window starts */ 1785 if (time_after_eq(jiffies, quota->charged_from + 1786 msecs_to_jiffies(quota->reset_interval))) { 1787 if (quota->esz && quota->charged_sz >= quota->esz) 1788 s->stat.qt_exceeds++; 1789 quota->total_charged_sz += quota->charged_sz; 1790 quota->charged_from = jiffies; 1791 quota->charged_sz = 0; 1792 damos_set_effective_quota(quota); 1793 } 1794 1795 if (!c->ops.get_scheme_score) 1796 return; 1797 1798 /* Fill up the score histogram */ 1799 memset(c->regions_score_histogram, 0, 1800 sizeof(*c->regions_score_histogram) * 1801 (DAMOS_MAX_SCORE + 1)); 1802 damon_for_each_target(t, c) { 1803 damon_for_each_region(r, t) { 1804 if (!__damos_valid_target(r, s)) 1805 continue; 1806 score = c->ops.get_scheme_score(c, t, r, s); 1807 c->regions_score_histogram[score] += 1808 damon_sz_region(r); 1809 if (score > max_score) 1810 max_score = score; 1811 } 1812 } 1813 1814 /* Set the min score limit */ 1815 for (cumulated_sz = 0, score = max_score; ; score--) { 1816 cumulated_sz += c->regions_score_histogram[score]; 1817 if (cumulated_sz >= quota->esz || !score) 1818 break; 1819 } 1820 quota->min_score = score; 1821 } 1822 1823 static void kdamond_apply_schemes(struct damon_ctx *c) 1824 { 1825 struct damon_target *t; 1826 struct damon_region *r, *next_r; 1827 struct damos *s; 1828 unsigned long sample_interval = c->attrs.sample_interval ? 1829 c->attrs.sample_interval : 1; 1830 bool has_schemes_to_apply = false; 1831 1832 damon_for_each_scheme(s, c) { 1833 if (c->passed_sample_intervals < s->next_apply_sis) 1834 continue; 1835 1836 if (!s->wmarks.activated) 1837 continue; 1838 1839 has_schemes_to_apply = true; 1840 1841 damos_adjust_quota(c, s); 1842 } 1843 1844 if (!has_schemes_to_apply) 1845 return; 1846 1847 damon_for_each_target(t, c) { 1848 damon_for_each_region_safe(r, next_r, t) 1849 damon_do_apply_schemes(c, t, r); 1850 } 1851 1852 damon_for_each_scheme(s, c) { 1853 if (c->passed_sample_intervals < s->next_apply_sis) 1854 continue; 1855 damos_walk_complete(c, s); 1856 s->next_apply_sis = c->passed_sample_intervals + 1857 (s->apply_interval_us ? s->apply_interval_us : 1858 c->attrs.aggr_interval) / sample_interval; 1859 } 1860 } 1861 1862 /* 1863 * Merge two adjacent regions into one region 1864 */ 1865 static void damon_merge_two_regions(struct damon_target *t, 1866 struct damon_region *l, struct damon_region *r) 1867 { 1868 unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r); 1869 1870 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 1871 (sz_l + sz_r); 1872 l->nr_accesses_bp = l->nr_accesses * 10000; 1873 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 1874 l->ar.end = r->ar.end; 1875 damon_destroy_region(r, t); 1876 } 1877 1878 /* 1879 * Merge adjacent regions having similar access frequencies 1880 * 1881 * t target affected by this merge operation 1882 * thres '->nr_accesses' diff threshold for the merge 1883 * sz_limit size upper limit of each region 1884 */ 1885 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 1886 unsigned long sz_limit) 1887 { 1888 struct damon_region *r, *prev = NULL, *next; 1889 1890 damon_for_each_region_safe(r, next, t) { 1891 if (abs(r->nr_accesses - r->last_nr_accesses) > thres) 1892 r->age = 0; 1893 else 1894 r->age++; 1895 1896 if (prev && prev->ar.end == r->ar.start && 1897 abs(prev->nr_accesses - r->nr_accesses) <= thres && 1898 damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) 1899 damon_merge_two_regions(t, prev, r); 1900 else 1901 prev = r; 1902 } 1903 } 1904 1905 /* 1906 * Merge adjacent regions having similar access frequencies 1907 * 1908 * threshold '->nr_accesses' diff threshold for the merge 1909 * sz_limit size upper limit of each region 1910 * 1911 * This function merges monitoring target regions which are adjacent and their 1912 * access frequencies are similar. This is for minimizing the monitoring 1913 * overhead under the dynamically changeable access pattern. If a merge was 1914 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 1915 * 1916 * The total number of regions could be higher than the user-defined limit, 1917 * max_nr_regions for some cases. For example, the user can update 1918 * max_nr_regions to a number that lower than the current number of regions 1919 * while DAMON is running. For such a case, repeat merging until the limit is 1920 * met while increasing @threshold up to possible maximum level. 1921 */ 1922 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 1923 unsigned long sz_limit) 1924 { 1925 struct damon_target *t; 1926 unsigned int nr_regions; 1927 unsigned int max_thres; 1928 1929 max_thres = c->attrs.aggr_interval / 1930 (c->attrs.sample_interval ? c->attrs.sample_interval : 1); 1931 do { 1932 nr_regions = 0; 1933 damon_for_each_target(t, c) { 1934 damon_merge_regions_of(t, threshold, sz_limit); 1935 nr_regions += damon_nr_regions(t); 1936 } 1937 threshold = max(1, threshold * 2); 1938 } while (nr_regions > c->attrs.max_nr_regions && 1939 threshold / 2 < max_thres); 1940 } 1941 1942 /* 1943 * Split a region in two 1944 * 1945 * r the region to be split 1946 * sz_r size of the first sub-region that will be made 1947 */ 1948 static void damon_split_region_at(struct damon_target *t, 1949 struct damon_region *r, unsigned long sz_r) 1950 { 1951 struct damon_region *new; 1952 1953 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 1954 if (!new) 1955 return; 1956 1957 r->ar.end = new->ar.start; 1958 1959 new->age = r->age; 1960 new->last_nr_accesses = r->last_nr_accesses; 1961 new->nr_accesses_bp = r->nr_accesses_bp; 1962 new->nr_accesses = r->nr_accesses; 1963 1964 damon_insert_region(new, r, damon_next_region(r), t); 1965 } 1966 1967 /* Split every region in the given target into 'nr_subs' regions */ 1968 static void damon_split_regions_of(struct damon_target *t, int nr_subs) 1969 { 1970 struct damon_region *r, *next; 1971 unsigned long sz_region, sz_sub = 0; 1972 int i; 1973 1974 damon_for_each_region_safe(r, next, t) { 1975 sz_region = damon_sz_region(r); 1976 1977 for (i = 0; i < nr_subs - 1 && 1978 sz_region > 2 * DAMON_MIN_REGION; i++) { 1979 /* 1980 * Randomly select size of left sub-region to be at 1981 * least 10 percent and at most 90% of original region 1982 */ 1983 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 1984 sz_region / 10, DAMON_MIN_REGION); 1985 /* Do not allow blank region */ 1986 if (sz_sub == 0 || sz_sub >= sz_region) 1987 continue; 1988 1989 damon_split_region_at(t, r, sz_sub); 1990 sz_region = sz_sub; 1991 } 1992 } 1993 } 1994 1995 /* 1996 * Split every target region into randomly-sized small regions 1997 * 1998 * This function splits every target region into random-sized small regions if 1999 * current total number of the regions is equal or smaller than half of the 2000 * user-specified maximum number of regions. This is for maximizing the 2001 * monitoring accuracy under the dynamically changeable access patterns. If a 2002 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 2003 * it. 2004 */ 2005 static void kdamond_split_regions(struct damon_ctx *ctx) 2006 { 2007 struct damon_target *t; 2008 unsigned int nr_regions = 0; 2009 static unsigned int last_nr_regions; 2010 int nr_subregions = 2; 2011 2012 damon_for_each_target(t, ctx) 2013 nr_regions += damon_nr_regions(t); 2014 2015 if (nr_regions > ctx->attrs.max_nr_regions / 2) 2016 return; 2017 2018 /* Maybe the middle of the region has different access frequency */ 2019 if (last_nr_regions == nr_regions && 2020 nr_regions < ctx->attrs.max_nr_regions / 3) 2021 nr_subregions = 3; 2022 2023 damon_for_each_target(t, ctx) 2024 damon_split_regions_of(t, nr_subregions); 2025 2026 last_nr_regions = nr_regions; 2027 } 2028 2029 /* 2030 * Check whether current monitoring should be stopped 2031 * 2032 * The monitoring is stopped when either the user requested to stop, or all 2033 * monitoring targets are invalid. 2034 * 2035 * Returns true if need to stop current monitoring. 2036 */ 2037 static bool kdamond_need_stop(struct damon_ctx *ctx) 2038 { 2039 struct damon_target *t; 2040 2041 if (kthread_should_stop()) 2042 return true; 2043 2044 if (!ctx->ops.target_valid) 2045 return false; 2046 2047 damon_for_each_target(t, ctx) { 2048 if (ctx->ops.target_valid(t)) 2049 return false; 2050 } 2051 2052 return true; 2053 } 2054 2055 static int damos_get_wmark_metric_value(enum damos_wmark_metric metric, 2056 unsigned long *metric_value) 2057 { 2058 switch (metric) { 2059 case DAMOS_WMARK_FREE_MEM_RATE: 2060 *metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 / 2061 totalram_pages(); 2062 return 0; 2063 default: 2064 break; 2065 } 2066 return -EINVAL; 2067 } 2068 2069 /* 2070 * Returns zero if the scheme is active. Else, returns time to wait for next 2071 * watermark check in micro-seconds. 2072 */ 2073 static unsigned long damos_wmark_wait_us(struct damos *scheme) 2074 { 2075 unsigned long metric; 2076 2077 if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric)) 2078 return 0; 2079 2080 /* higher than high watermark or lower than low watermark */ 2081 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 2082 if (scheme->wmarks.activated) 2083 pr_debug("deactivate a scheme (%d) for %s wmark\n", 2084 scheme->action, 2085 str_high_low(metric > scheme->wmarks.high)); 2086 scheme->wmarks.activated = false; 2087 return scheme->wmarks.interval; 2088 } 2089 2090 /* inactive and higher than middle watermark */ 2091 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 2092 !scheme->wmarks.activated) 2093 return scheme->wmarks.interval; 2094 2095 if (!scheme->wmarks.activated) 2096 pr_debug("activate a scheme (%d)\n", scheme->action); 2097 scheme->wmarks.activated = true; 2098 return 0; 2099 } 2100 2101 static void kdamond_usleep(unsigned long usecs) 2102 { 2103 if (usecs >= USLEEP_RANGE_UPPER_BOUND) 2104 schedule_timeout_idle(usecs_to_jiffies(usecs)); 2105 else 2106 usleep_range_idle(usecs, usecs + 1); 2107 } 2108 2109 /* 2110 * kdamond_call() - handle damon_call_control. 2111 * @ctx: The &struct damon_ctx of the kdamond. 2112 * @cancel: Whether to cancel the invocation of the function. 2113 * 2114 * If there is a &struct damon_call_control request that registered via 2115 * &damon_call() on @ctx, do or cancel the invocation of the function depending 2116 * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS 2117 * watermarks, or the kdamond is already out of the main loop and therefore 2118 * will be terminated. 2119 */ 2120 static void kdamond_call(struct damon_ctx *ctx, bool cancel) 2121 { 2122 struct damon_call_control *control; 2123 int ret = 0; 2124 2125 mutex_lock(&ctx->call_control_lock); 2126 control = ctx->call_control; 2127 mutex_unlock(&ctx->call_control_lock); 2128 if (!control) 2129 return; 2130 if (cancel) { 2131 control->canceled = true; 2132 } else { 2133 ret = control->fn(control->data); 2134 control->return_code = ret; 2135 } 2136 complete(&control->completion); 2137 mutex_lock(&ctx->call_control_lock); 2138 ctx->call_control = NULL; 2139 mutex_unlock(&ctx->call_control_lock); 2140 } 2141 2142 /* Returns negative error code if it's not activated but should return */ 2143 static int kdamond_wait_activation(struct damon_ctx *ctx) 2144 { 2145 struct damos *s; 2146 unsigned long wait_time; 2147 unsigned long min_wait_time = 0; 2148 bool init_wait_time = false; 2149 2150 while (!kdamond_need_stop(ctx)) { 2151 damon_for_each_scheme(s, ctx) { 2152 wait_time = damos_wmark_wait_us(s); 2153 if (!init_wait_time || wait_time < min_wait_time) { 2154 init_wait_time = true; 2155 min_wait_time = wait_time; 2156 } 2157 } 2158 if (!min_wait_time) 2159 return 0; 2160 2161 kdamond_usleep(min_wait_time); 2162 2163 if (ctx->callback.after_wmarks_check && 2164 ctx->callback.after_wmarks_check(ctx)) 2165 break; 2166 kdamond_call(ctx, true); 2167 damos_walk_cancel(ctx); 2168 } 2169 return -EBUSY; 2170 } 2171 2172 static void kdamond_init_intervals_sis(struct damon_ctx *ctx) 2173 { 2174 unsigned long sample_interval = ctx->attrs.sample_interval ? 2175 ctx->attrs.sample_interval : 1; 2176 unsigned long apply_interval; 2177 struct damos *scheme; 2178 2179 ctx->passed_sample_intervals = 0; 2180 ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; 2181 ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / 2182 sample_interval; 2183 2184 damon_for_each_scheme(scheme, ctx) { 2185 apply_interval = scheme->apply_interval_us ? 2186 scheme->apply_interval_us : ctx->attrs.aggr_interval; 2187 scheme->next_apply_sis = apply_interval / sample_interval; 2188 } 2189 } 2190 2191 /* 2192 * The monitoring daemon that runs as a kernel thread 2193 */ 2194 static int kdamond_fn(void *data) 2195 { 2196 struct damon_ctx *ctx = data; 2197 struct damon_target *t; 2198 struct damon_region *r, *next; 2199 unsigned int max_nr_accesses = 0; 2200 unsigned long sz_limit = 0; 2201 2202 pr_debug("kdamond (%d) starts\n", current->pid); 2203 2204 complete(&ctx->kdamond_started); 2205 kdamond_init_intervals_sis(ctx); 2206 2207 if (ctx->ops.init) 2208 ctx->ops.init(ctx); 2209 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 2210 goto done; 2211 ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1, 2212 sizeof(*ctx->regions_score_histogram), GFP_KERNEL); 2213 if (!ctx->regions_score_histogram) 2214 goto done; 2215 2216 sz_limit = damon_region_sz_limit(ctx); 2217 2218 while (!kdamond_need_stop(ctx)) { 2219 /* 2220 * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could 2221 * be changed from after_wmarks_check() or after_aggregation() 2222 * callbacks. Read the values here, and use those for this 2223 * iteration. That is, damon_set_attrs() updated new values 2224 * are respected from next iteration. 2225 */ 2226 unsigned long next_aggregation_sis = ctx->next_aggregation_sis; 2227 unsigned long next_ops_update_sis = ctx->next_ops_update_sis; 2228 unsigned long sample_interval = ctx->attrs.sample_interval; 2229 2230 if (kdamond_wait_activation(ctx)) 2231 break; 2232 2233 if (ctx->ops.prepare_access_checks) 2234 ctx->ops.prepare_access_checks(ctx); 2235 if (ctx->callback.after_sampling && 2236 ctx->callback.after_sampling(ctx)) 2237 break; 2238 kdamond_call(ctx, false); 2239 2240 kdamond_usleep(sample_interval); 2241 ctx->passed_sample_intervals++; 2242 2243 if (ctx->ops.check_accesses) 2244 max_nr_accesses = ctx->ops.check_accesses(ctx); 2245 2246 if (ctx->passed_sample_intervals >= next_aggregation_sis) { 2247 kdamond_merge_regions(ctx, 2248 max_nr_accesses / 10, 2249 sz_limit); 2250 if (ctx->callback.after_aggregation && 2251 ctx->callback.after_aggregation(ctx)) 2252 break; 2253 } 2254 2255 /* 2256 * do kdamond_apply_schemes() after kdamond_merge_regions() if 2257 * possible, to reduce overhead 2258 */ 2259 if (!list_empty(&ctx->schemes)) 2260 kdamond_apply_schemes(ctx); 2261 else 2262 damos_walk_cancel(ctx); 2263 2264 sample_interval = ctx->attrs.sample_interval ? 2265 ctx->attrs.sample_interval : 1; 2266 if (ctx->passed_sample_intervals >= next_aggregation_sis) { 2267 ctx->next_aggregation_sis = next_aggregation_sis + 2268 ctx->attrs.aggr_interval / sample_interval; 2269 2270 kdamond_reset_aggregated(ctx); 2271 kdamond_split_regions(ctx); 2272 if (ctx->ops.reset_aggregated) 2273 ctx->ops.reset_aggregated(ctx); 2274 } 2275 2276 if (ctx->passed_sample_intervals >= next_ops_update_sis) { 2277 ctx->next_ops_update_sis = next_ops_update_sis + 2278 ctx->attrs.ops_update_interval / 2279 sample_interval; 2280 if (ctx->ops.update) 2281 ctx->ops.update(ctx); 2282 sz_limit = damon_region_sz_limit(ctx); 2283 } 2284 } 2285 done: 2286 damon_for_each_target(t, ctx) { 2287 damon_for_each_region_safe(r, next, t) 2288 damon_destroy_region(r, t); 2289 } 2290 2291 if (ctx->callback.before_terminate) 2292 ctx->callback.before_terminate(ctx); 2293 if (ctx->ops.cleanup) 2294 ctx->ops.cleanup(ctx); 2295 kfree(ctx->regions_score_histogram); 2296 2297 pr_debug("kdamond (%d) finishes\n", current->pid); 2298 mutex_lock(&ctx->kdamond_lock); 2299 ctx->kdamond = NULL; 2300 mutex_unlock(&ctx->kdamond_lock); 2301 2302 kdamond_call(ctx, true); 2303 damos_walk_cancel(ctx); 2304 2305 mutex_lock(&damon_lock); 2306 nr_running_ctxs--; 2307 if (!nr_running_ctxs && running_exclusive_ctxs) 2308 running_exclusive_ctxs = false; 2309 mutex_unlock(&damon_lock); 2310 2311 return 0; 2312 } 2313 2314 /* 2315 * struct damon_system_ram_region - System RAM resource address region of 2316 * [@start, @end). 2317 * @start: Start address of the region (inclusive). 2318 * @end: End address of the region (exclusive). 2319 */ 2320 struct damon_system_ram_region { 2321 unsigned long start; 2322 unsigned long end; 2323 }; 2324 2325 static int walk_system_ram(struct resource *res, void *arg) 2326 { 2327 struct damon_system_ram_region *a = arg; 2328 2329 if (a->end - a->start < resource_size(res)) { 2330 a->start = res->start; 2331 a->end = res->end; 2332 } 2333 return 0; 2334 } 2335 2336 /* 2337 * Find biggest 'System RAM' resource and store its start and end address in 2338 * @start and @end, respectively. If no System RAM is found, returns false. 2339 */ 2340 static bool damon_find_biggest_system_ram(unsigned long *start, 2341 unsigned long *end) 2342 2343 { 2344 struct damon_system_ram_region arg = {}; 2345 2346 walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); 2347 if (arg.end <= arg.start) 2348 return false; 2349 2350 *start = arg.start; 2351 *end = arg.end; 2352 return true; 2353 } 2354 2355 /** 2356 * damon_set_region_biggest_system_ram_default() - Set the region of the given 2357 * monitoring target as requested, or biggest 'System RAM'. 2358 * @t: The monitoring target to set the region. 2359 * @start: The pointer to the start address of the region. 2360 * @end: The pointer to the end address of the region. 2361 * 2362 * This function sets the region of @t as requested by @start and @end. If the 2363 * values of @start and @end are zero, however, this function finds the biggest 2364 * 'System RAM' resource and sets the region to cover the resource. In the 2365 * latter case, this function saves the start and end addresses of the resource 2366 * in @start and @end, respectively. 2367 * 2368 * Return: 0 on success, negative error code otherwise. 2369 */ 2370 int damon_set_region_biggest_system_ram_default(struct damon_target *t, 2371 unsigned long *start, unsigned long *end) 2372 { 2373 struct damon_addr_range addr_range; 2374 2375 if (*start > *end) 2376 return -EINVAL; 2377 2378 if (!*start && !*end && 2379 !damon_find_biggest_system_ram(start, end)) 2380 return -EINVAL; 2381 2382 addr_range.start = *start; 2383 addr_range.end = *end; 2384 return damon_set_regions(t, &addr_range, 1); 2385 } 2386 2387 /* 2388 * damon_moving_sum() - Calculate an inferred moving sum value. 2389 * @mvsum: Inferred sum of the last @len_window values. 2390 * @nomvsum: Non-moving sum of the last discrete @len_window window values. 2391 * @len_window: The number of last values to take care of. 2392 * @new_value: New value that will be added to the pseudo moving sum. 2393 * 2394 * Moving sum (moving average * window size) is good for handling noise, but 2395 * the cost of keeping past values can be high for arbitrary window size. This 2396 * function implements a lightweight pseudo moving sum function that doesn't 2397 * keep the past window values. 2398 * 2399 * It simply assumes there was no noise in the past, and get the no-noise 2400 * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a 2401 * non-moving sum of the last window. For example, if @len_window is 10 and we 2402 * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 2403 * values. Hence, this function simply drops @nomvsum / @len_window from 2404 * given @mvsum and add @new_value. 2405 * 2406 * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for 2407 * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For 2408 * calculating next moving sum with a new value, we should drop 0 from 50 and 2409 * add the new value. However, this function assumes it got value 5 for each 2410 * of the last ten times. Based on the assumption, when the next value is 2411 * measured, it drops the assumed past value, 5 from the current sum, and add 2412 * the new value to get the updated pseduo-moving average. 2413 * 2414 * This means the value could have errors, but the errors will be disappeared 2415 * for every @len_window aligned calls. For example, if @len_window is 10, the 2416 * pseudo moving sum with 11th value to 19th value would have an error. But 2417 * the sum with 20th value will not have the error. 2418 * 2419 * Return: Pseudo-moving average after getting the @new_value. 2420 */ 2421 static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, 2422 unsigned int len_window, unsigned int new_value) 2423 { 2424 return mvsum - nomvsum / len_window + new_value; 2425 } 2426 2427 /** 2428 * damon_update_region_access_rate() - Update the access rate of a region. 2429 * @r: The DAMON region to update for its access check result. 2430 * @accessed: Whether the region has accessed during last sampling interval. 2431 * @attrs: The damon_attrs of the DAMON context. 2432 * 2433 * Update the access rate of a region with the region's last sampling interval 2434 * access check result. 2435 * 2436 * Usually this will be called by &damon_operations->check_accesses callback. 2437 */ 2438 void damon_update_region_access_rate(struct damon_region *r, bool accessed, 2439 struct damon_attrs *attrs) 2440 { 2441 unsigned int len_window = 1; 2442 2443 /* 2444 * sample_interval can be zero, but cannot be larger than 2445 * aggr_interval, owing to validation of damon_set_attrs(). 2446 */ 2447 if (attrs->sample_interval) 2448 len_window = damon_max_nr_accesses(attrs); 2449 r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, 2450 r->last_nr_accesses * 10000, len_window, 2451 accessed ? 10000 : 0); 2452 2453 if (accessed) 2454 r->nr_accesses++; 2455 } 2456 2457 static int __init damon_init(void) 2458 { 2459 damon_region_cache = KMEM_CACHE(damon_region, 0); 2460 if (unlikely(!damon_region_cache)) { 2461 pr_err("creating damon_region_cache fails\n"); 2462 return -ENOMEM; 2463 } 2464 2465 return 0; 2466 } 2467 2468 subsys_initcall(damon_init); 2469 2470 #include "tests/core-kunit.h" 2471