1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/psi.h> 15 #include <linux/slab.h> 16 #include <linux/string.h> 17 18 #define CREATE_TRACE_POINTS 19 #include <trace/events/damon.h> 20 21 #ifdef CONFIG_DAMON_KUNIT_TEST 22 #undef DAMON_MIN_REGION 23 #define DAMON_MIN_REGION 1 24 #endif 25 26 static DEFINE_MUTEX(damon_lock); 27 static int nr_running_ctxs; 28 static bool running_exclusive_ctxs; 29 30 static DEFINE_MUTEX(damon_ops_lock); 31 static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; 32 33 static struct kmem_cache *damon_region_cache __ro_after_init; 34 35 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ 36 static bool __damon_is_registered_ops(enum damon_ops_id id) 37 { 38 struct damon_operations empty_ops = {}; 39 40 if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops))) 41 return false; 42 return true; 43 } 44 45 /** 46 * damon_is_registered_ops() - Check if a given damon_operations is registered. 47 * @id: Id of the damon_operations to check if registered. 48 * 49 * Return: true if the ops is set, false otherwise. 50 */ 51 bool damon_is_registered_ops(enum damon_ops_id id) 52 { 53 bool registered; 54 55 if (id >= NR_DAMON_OPS) 56 return false; 57 mutex_lock(&damon_ops_lock); 58 registered = __damon_is_registered_ops(id); 59 mutex_unlock(&damon_ops_lock); 60 return registered; 61 } 62 63 /** 64 * damon_register_ops() - Register a monitoring operations set to DAMON. 65 * @ops: monitoring operations set to register. 66 * 67 * This function registers a monitoring operations set of valid &struct 68 * damon_operations->id so that others can find and use them later. 69 * 70 * Return: 0 on success, negative error code otherwise. 71 */ 72 int damon_register_ops(struct damon_operations *ops) 73 { 74 int err = 0; 75 76 if (ops->id >= NR_DAMON_OPS) 77 return -EINVAL; 78 mutex_lock(&damon_ops_lock); 79 /* Fail for already registered ops */ 80 if (__damon_is_registered_ops(ops->id)) { 81 err = -EINVAL; 82 goto out; 83 } 84 damon_registered_ops[ops->id] = *ops; 85 out: 86 mutex_unlock(&damon_ops_lock); 87 return err; 88 } 89 90 /** 91 * damon_select_ops() - Select a monitoring operations to use with the context. 92 * @ctx: monitoring context to use the operations. 93 * @id: id of the registered monitoring operations to select. 94 * 95 * This function finds registered monitoring operations set of @id and make 96 * @ctx to use it. 97 * 98 * Return: 0 on success, negative error code otherwise. 99 */ 100 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) 101 { 102 int err = 0; 103 104 if (id >= NR_DAMON_OPS) 105 return -EINVAL; 106 107 mutex_lock(&damon_ops_lock); 108 if (!__damon_is_registered_ops(id)) 109 err = -EINVAL; 110 else 111 ctx->ops = damon_registered_ops[id]; 112 mutex_unlock(&damon_ops_lock); 113 return err; 114 } 115 116 /* 117 * Construct a damon_region struct 118 * 119 * Returns the pointer to the new struct if success, or NULL otherwise 120 */ 121 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 122 { 123 struct damon_region *region; 124 125 region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL); 126 if (!region) 127 return NULL; 128 129 region->ar.start = start; 130 region->ar.end = end; 131 region->nr_accesses = 0; 132 region->nr_accesses_bp = 0; 133 INIT_LIST_HEAD(®ion->list); 134 135 region->age = 0; 136 region->last_nr_accesses = 0; 137 138 return region; 139 } 140 141 void damon_add_region(struct damon_region *r, struct damon_target *t) 142 { 143 list_add_tail(&r->list, &t->regions_list); 144 t->nr_regions++; 145 } 146 147 static void damon_del_region(struct damon_region *r, struct damon_target *t) 148 { 149 list_del(&r->list); 150 t->nr_regions--; 151 } 152 153 static void damon_free_region(struct damon_region *r) 154 { 155 kmem_cache_free(damon_region_cache, r); 156 } 157 158 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 159 { 160 damon_del_region(r, t); 161 damon_free_region(r); 162 } 163 164 /* 165 * Check whether a region is intersecting an address range 166 * 167 * Returns true if it is. 168 */ 169 static bool damon_intersect(struct damon_region *r, 170 struct damon_addr_range *re) 171 { 172 return !(r->ar.end <= re->start || re->end <= r->ar.start); 173 } 174 175 /* 176 * Fill holes in regions with new regions. 177 */ 178 static int damon_fill_regions_holes(struct damon_region *first, 179 struct damon_region *last, struct damon_target *t) 180 { 181 struct damon_region *r = first; 182 183 damon_for_each_region_from(r, t) { 184 struct damon_region *next, *newr; 185 186 if (r == last) 187 break; 188 next = damon_next_region(r); 189 if (r->ar.end != next->ar.start) { 190 newr = damon_new_region(r->ar.end, next->ar.start); 191 if (!newr) 192 return -ENOMEM; 193 damon_insert_region(newr, r, next, t); 194 } 195 } 196 return 0; 197 } 198 199 /* 200 * damon_set_regions() - Set regions of a target for given address ranges. 201 * @t: the given target. 202 * @ranges: array of new monitoring target ranges. 203 * @nr_ranges: length of @ranges. 204 * 205 * This function adds new regions to, or modify existing regions of a 206 * monitoring target to fit in specific ranges. 207 * 208 * Return: 0 if success, or negative error code otherwise. 209 */ 210 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, 211 unsigned int nr_ranges) 212 { 213 struct damon_region *r, *next; 214 unsigned int i; 215 int err; 216 217 /* Remove regions which are not in the new ranges */ 218 damon_for_each_region_safe(r, next, t) { 219 for (i = 0; i < nr_ranges; i++) { 220 if (damon_intersect(r, &ranges[i])) 221 break; 222 } 223 if (i == nr_ranges) 224 damon_destroy_region(r, t); 225 } 226 227 r = damon_first_region(t); 228 /* Add new regions or resize existing regions to fit in the ranges */ 229 for (i = 0; i < nr_ranges; i++) { 230 struct damon_region *first = NULL, *last, *newr; 231 struct damon_addr_range *range; 232 233 range = &ranges[i]; 234 /* Get the first/last regions intersecting with the range */ 235 damon_for_each_region_from(r, t) { 236 if (damon_intersect(r, range)) { 237 if (!first) 238 first = r; 239 last = r; 240 } 241 if (r->ar.start >= range->end) 242 break; 243 } 244 if (!first) { 245 /* no region intersects with this range */ 246 newr = damon_new_region( 247 ALIGN_DOWN(range->start, 248 DAMON_MIN_REGION), 249 ALIGN(range->end, DAMON_MIN_REGION)); 250 if (!newr) 251 return -ENOMEM; 252 damon_insert_region(newr, damon_prev_region(r), r, t); 253 } else { 254 /* resize intersecting regions to fit in this range */ 255 first->ar.start = ALIGN_DOWN(range->start, 256 DAMON_MIN_REGION); 257 last->ar.end = ALIGN(range->end, DAMON_MIN_REGION); 258 259 /* fill possible holes in the range */ 260 err = damon_fill_regions_holes(first, last, t); 261 if (err) 262 return err; 263 } 264 } 265 return 0; 266 } 267 268 struct damos_filter *damos_new_filter(enum damos_filter_type type, 269 bool matching) 270 { 271 struct damos_filter *filter; 272 273 filter = kmalloc(sizeof(*filter), GFP_KERNEL); 274 if (!filter) 275 return NULL; 276 filter->type = type; 277 filter->matching = matching; 278 INIT_LIST_HEAD(&filter->list); 279 return filter; 280 } 281 282 void damos_add_filter(struct damos *s, struct damos_filter *f) 283 { 284 list_add_tail(&f->list, &s->filters); 285 } 286 287 static void damos_del_filter(struct damos_filter *f) 288 { 289 list_del(&f->list); 290 } 291 292 static void damos_free_filter(struct damos_filter *f) 293 { 294 kfree(f); 295 } 296 297 void damos_destroy_filter(struct damos_filter *f) 298 { 299 damos_del_filter(f); 300 damos_free_filter(f); 301 } 302 303 struct damos_quota_goal *damos_new_quota_goal( 304 enum damos_quota_goal_metric metric, 305 unsigned long target_value) 306 { 307 struct damos_quota_goal *goal; 308 309 goal = kmalloc(sizeof(*goal), GFP_KERNEL); 310 if (!goal) 311 return NULL; 312 goal->metric = metric; 313 goal->target_value = target_value; 314 INIT_LIST_HEAD(&goal->list); 315 return goal; 316 } 317 318 void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g) 319 { 320 list_add_tail(&g->list, &q->goals); 321 } 322 323 static void damos_del_quota_goal(struct damos_quota_goal *g) 324 { 325 list_del(&g->list); 326 } 327 328 static void damos_free_quota_goal(struct damos_quota_goal *g) 329 { 330 kfree(g); 331 } 332 333 void damos_destroy_quota_goal(struct damos_quota_goal *g) 334 { 335 damos_del_quota_goal(g); 336 damos_free_quota_goal(g); 337 } 338 339 /* initialize fields of @quota that normally API users wouldn't set */ 340 static struct damos_quota *damos_quota_init(struct damos_quota *quota) 341 { 342 quota->esz = 0; 343 quota->total_charged_sz = 0; 344 quota->total_charged_ns = 0; 345 quota->charged_sz = 0; 346 quota->charged_from = 0; 347 quota->charge_target_from = NULL; 348 quota->charge_addr_from = 0; 349 quota->esz_bp = 0; 350 return quota; 351 } 352 353 struct damos *damon_new_scheme(struct damos_access_pattern *pattern, 354 enum damos_action action, 355 unsigned long apply_interval_us, 356 struct damos_quota *quota, 357 struct damos_watermarks *wmarks) 358 { 359 struct damos *scheme; 360 361 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 362 if (!scheme) 363 return NULL; 364 scheme->pattern = *pattern; 365 scheme->action = action; 366 scheme->apply_interval_us = apply_interval_us; 367 /* 368 * next_apply_sis will be set when kdamond starts. While kdamond is 369 * running, it will also updated when it is added to the DAMON context, 370 * or damon_attrs are updated. 371 */ 372 scheme->next_apply_sis = 0; 373 INIT_LIST_HEAD(&scheme->filters); 374 scheme->stat = (struct damos_stat){}; 375 INIT_LIST_HEAD(&scheme->list); 376 377 scheme->quota = *(damos_quota_init(quota)); 378 /* quota.goals should be separately set by caller */ 379 INIT_LIST_HEAD(&scheme->quota.goals); 380 381 scheme->wmarks = *wmarks; 382 scheme->wmarks.activated = true; 383 384 return scheme; 385 } 386 387 static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) 388 { 389 unsigned long sample_interval = ctx->attrs.sample_interval ? 390 ctx->attrs.sample_interval : 1; 391 unsigned long apply_interval = s->apply_interval_us ? 392 s->apply_interval_us : ctx->attrs.aggr_interval; 393 394 s->next_apply_sis = ctx->passed_sample_intervals + 395 apply_interval / sample_interval; 396 } 397 398 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 399 { 400 list_add_tail(&s->list, &ctx->schemes); 401 damos_set_next_apply_sis(s, ctx); 402 } 403 404 static void damon_del_scheme(struct damos *s) 405 { 406 list_del(&s->list); 407 } 408 409 static void damon_free_scheme(struct damos *s) 410 { 411 kfree(s); 412 } 413 414 void damon_destroy_scheme(struct damos *s) 415 { 416 struct damos_quota_goal *g, *g_next; 417 struct damos_filter *f, *next; 418 419 damos_for_each_quota_goal_safe(g, g_next, &s->quota) 420 damos_destroy_quota_goal(g); 421 422 damos_for_each_filter_safe(f, next, s) 423 damos_destroy_filter(f); 424 damon_del_scheme(s); 425 damon_free_scheme(s); 426 } 427 428 /* 429 * Construct a damon_target struct 430 * 431 * Returns the pointer to the new struct if success, or NULL otherwise 432 */ 433 struct damon_target *damon_new_target(void) 434 { 435 struct damon_target *t; 436 437 t = kmalloc(sizeof(*t), GFP_KERNEL); 438 if (!t) 439 return NULL; 440 441 t->pid = NULL; 442 t->nr_regions = 0; 443 INIT_LIST_HEAD(&t->regions_list); 444 INIT_LIST_HEAD(&t->list); 445 446 return t; 447 } 448 449 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 450 { 451 list_add_tail(&t->list, &ctx->adaptive_targets); 452 } 453 454 bool damon_targets_empty(struct damon_ctx *ctx) 455 { 456 return list_empty(&ctx->adaptive_targets); 457 } 458 459 static void damon_del_target(struct damon_target *t) 460 { 461 list_del(&t->list); 462 } 463 464 void damon_free_target(struct damon_target *t) 465 { 466 struct damon_region *r, *next; 467 468 damon_for_each_region_safe(r, next, t) 469 damon_free_region(r); 470 kfree(t); 471 } 472 473 void damon_destroy_target(struct damon_target *t) 474 { 475 damon_del_target(t); 476 damon_free_target(t); 477 } 478 479 unsigned int damon_nr_regions(struct damon_target *t) 480 { 481 return t->nr_regions; 482 } 483 484 struct damon_ctx *damon_new_ctx(void) 485 { 486 struct damon_ctx *ctx; 487 488 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 489 if (!ctx) 490 return NULL; 491 492 init_completion(&ctx->kdamond_started); 493 494 ctx->attrs.sample_interval = 5 * 1000; 495 ctx->attrs.aggr_interval = 100 * 1000; 496 ctx->attrs.ops_update_interval = 60 * 1000 * 1000; 497 498 ctx->passed_sample_intervals = 0; 499 /* These will be set from kdamond_init_intervals_sis() */ 500 ctx->next_aggregation_sis = 0; 501 ctx->next_ops_update_sis = 0; 502 503 mutex_init(&ctx->kdamond_lock); 504 505 ctx->attrs.min_nr_regions = 10; 506 ctx->attrs.max_nr_regions = 1000; 507 508 INIT_LIST_HEAD(&ctx->adaptive_targets); 509 INIT_LIST_HEAD(&ctx->schemes); 510 511 return ctx; 512 } 513 514 static void damon_destroy_targets(struct damon_ctx *ctx) 515 { 516 struct damon_target *t, *next_t; 517 518 if (ctx->ops.cleanup) { 519 ctx->ops.cleanup(ctx); 520 return; 521 } 522 523 damon_for_each_target_safe(t, next_t, ctx) 524 damon_destroy_target(t); 525 } 526 527 void damon_destroy_ctx(struct damon_ctx *ctx) 528 { 529 struct damos *s, *next_s; 530 531 damon_destroy_targets(ctx); 532 533 damon_for_each_scheme_safe(s, next_s, ctx) 534 damon_destroy_scheme(s); 535 536 kfree(ctx); 537 } 538 539 static unsigned int damon_age_for_new_attrs(unsigned int age, 540 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 541 { 542 return age * old_attrs->aggr_interval / new_attrs->aggr_interval; 543 } 544 545 /* convert access ratio in bp (per 10,000) to nr_accesses */ 546 static unsigned int damon_accesses_bp_to_nr_accesses( 547 unsigned int accesses_bp, struct damon_attrs *attrs) 548 { 549 return accesses_bp * damon_max_nr_accesses(attrs) / 10000; 550 } 551 552 /* convert nr_accesses to access ratio in bp (per 10,000) */ 553 static unsigned int damon_nr_accesses_to_accesses_bp( 554 unsigned int nr_accesses, struct damon_attrs *attrs) 555 { 556 return nr_accesses * 10000 / damon_max_nr_accesses(attrs); 557 } 558 559 static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, 560 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 561 { 562 return damon_accesses_bp_to_nr_accesses( 563 damon_nr_accesses_to_accesses_bp( 564 nr_accesses, old_attrs), 565 new_attrs); 566 } 567 568 static void damon_update_monitoring_result(struct damon_region *r, 569 struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) 570 { 571 r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, 572 old_attrs, new_attrs); 573 r->nr_accesses_bp = r->nr_accesses * 10000; 574 r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); 575 } 576 577 /* 578 * region->nr_accesses is the number of sampling intervals in the last 579 * aggregation interval that access to the region has found, and region->age is 580 * the number of aggregation intervals that its access pattern has maintained. 581 * For the reason, the real meaning of the two fields depend on current 582 * sampling interval and aggregation interval. This function updates 583 * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. 584 */ 585 static void damon_update_monitoring_results(struct damon_ctx *ctx, 586 struct damon_attrs *new_attrs) 587 { 588 struct damon_attrs *old_attrs = &ctx->attrs; 589 struct damon_target *t; 590 struct damon_region *r; 591 592 /* if any interval is zero, simply forgive conversion */ 593 if (!old_attrs->sample_interval || !old_attrs->aggr_interval || 594 !new_attrs->sample_interval || 595 !new_attrs->aggr_interval) 596 return; 597 598 damon_for_each_target(t, ctx) 599 damon_for_each_region(r, t) 600 damon_update_monitoring_result( 601 r, old_attrs, new_attrs); 602 } 603 604 /** 605 * damon_set_attrs() - Set attributes for the monitoring. 606 * @ctx: monitoring context 607 * @attrs: monitoring attributes 608 * 609 * This function should be called while the kdamond is not running, or an 610 * access check results aggregation is not ongoing (e.g., from 611 * &struct damon_callback->after_aggregation or 612 * &struct damon_callback->after_wmarks_check callbacks). 613 * 614 * Every time interval is in micro-seconds. 615 * 616 * Return: 0 on success, negative error code otherwise. 617 */ 618 int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) 619 { 620 unsigned long sample_interval = attrs->sample_interval ? 621 attrs->sample_interval : 1; 622 struct damos *s; 623 624 if (attrs->min_nr_regions < 3) 625 return -EINVAL; 626 if (attrs->min_nr_regions > attrs->max_nr_regions) 627 return -EINVAL; 628 if (attrs->sample_interval > attrs->aggr_interval) 629 return -EINVAL; 630 631 ctx->next_aggregation_sis = ctx->passed_sample_intervals + 632 attrs->aggr_interval / sample_interval; 633 ctx->next_ops_update_sis = ctx->passed_sample_intervals + 634 attrs->ops_update_interval / sample_interval; 635 636 damon_update_monitoring_results(ctx, attrs); 637 ctx->attrs = *attrs; 638 639 damon_for_each_scheme(s, ctx) 640 damos_set_next_apply_sis(s, ctx); 641 642 return 0; 643 } 644 645 /** 646 * damon_set_schemes() - Set data access monitoring based operation schemes. 647 * @ctx: monitoring context 648 * @schemes: array of the schemes 649 * @nr_schemes: number of entries in @schemes 650 * 651 * This function should not be called while the kdamond of the context is 652 * running. 653 */ 654 void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 655 ssize_t nr_schemes) 656 { 657 struct damos *s, *next; 658 ssize_t i; 659 660 damon_for_each_scheme_safe(s, next, ctx) 661 damon_destroy_scheme(s); 662 for (i = 0; i < nr_schemes; i++) 663 damon_add_scheme(ctx, schemes[i]); 664 } 665 666 /** 667 * damon_nr_running_ctxs() - Return number of currently running contexts. 668 */ 669 int damon_nr_running_ctxs(void) 670 { 671 int nr_ctxs; 672 673 mutex_lock(&damon_lock); 674 nr_ctxs = nr_running_ctxs; 675 mutex_unlock(&damon_lock); 676 677 return nr_ctxs; 678 } 679 680 /* Returns the size upper limit for each monitoring region */ 681 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 682 { 683 struct damon_target *t; 684 struct damon_region *r; 685 unsigned long sz = 0; 686 687 damon_for_each_target(t, ctx) { 688 damon_for_each_region(r, t) 689 sz += damon_sz_region(r); 690 } 691 692 if (ctx->attrs.min_nr_regions) 693 sz /= ctx->attrs.min_nr_regions; 694 if (sz < DAMON_MIN_REGION) 695 sz = DAMON_MIN_REGION; 696 697 return sz; 698 } 699 700 static int kdamond_fn(void *data); 701 702 /* 703 * __damon_start() - Starts monitoring with given context. 704 * @ctx: monitoring context 705 * 706 * This function should be called while damon_lock is hold. 707 * 708 * Return: 0 on success, negative error code otherwise. 709 */ 710 static int __damon_start(struct damon_ctx *ctx) 711 { 712 int err = -EBUSY; 713 714 mutex_lock(&ctx->kdamond_lock); 715 if (!ctx->kdamond) { 716 err = 0; 717 reinit_completion(&ctx->kdamond_started); 718 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 719 nr_running_ctxs); 720 if (IS_ERR(ctx->kdamond)) { 721 err = PTR_ERR(ctx->kdamond); 722 ctx->kdamond = NULL; 723 } else { 724 wait_for_completion(&ctx->kdamond_started); 725 } 726 } 727 mutex_unlock(&ctx->kdamond_lock); 728 729 return err; 730 } 731 732 /** 733 * damon_start() - Starts the monitorings for a given group of contexts. 734 * @ctxs: an array of the pointers for contexts to start monitoring 735 * @nr_ctxs: size of @ctxs 736 * @exclusive: exclusiveness of this contexts group 737 * 738 * This function starts a group of monitoring threads for a group of monitoring 739 * contexts. One thread per each context is created and run in parallel. The 740 * caller should handle synchronization between the threads by itself. If 741 * @exclusive is true and a group of threads that created by other 742 * 'damon_start()' call is currently running, this function does nothing but 743 * returns -EBUSY. 744 * 745 * Return: 0 on success, negative error code otherwise. 746 */ 747 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) 748 { 749 int i; 750 int err = 0; 751 752 mutex_lock(&damon_lock); 753 if ((exclusive && nr_running_ctxs) || 754 (!exclusive && running_exclusive_ctxs)) { 755 mutex_unlock(&damon_lock); 756 return -EBUSY; 757 } 758 759 for (i = 0; i < nr_ctxs; i++) { 760 err = __damon_start(ctxs[i]); 761 if (err) 762 break; 763 nr_running_ctxs++; 764 } 765 if (exclusive && nr_running_ctxs) 766 running_exclusive_ctxs = true; 767 mutex_unlock(&damon_lock); 768 769 return err; 770 } 771 772 /* 773 * __damon_stop() - Stops monitoring of a given context. 774 * @ctx: monitoring context 775 * 776 * Return: 0 on success, negative error code otherwise. 777 */ 778 static int __damon_stop(struct damon_ctx *ctx) 779 { 780 struct task_struct *tsk; 781 782 mutex_lock(&ctx->kdamond_lock); 783 tsk = ctx->kdamond; 784 if (tsk) { 785 get_task_struct(tsk); 786 mutex_unlock(&ctx->kdamond_lock); 787 kthread_stop_put(tsk); 788 return 0; 789 } 790 mutex_unlock(&ctx->kdamond_lock); 791 792 return -EPERM; 793 } 794 795 /** 796 * damon_stop() - Stops the monitorings for a given group of contexts. 797 * @ctxs: an array of the pointers for contexts to stop monitoring 798 * @nr_ctxs: size of @ctxs 799 * 800 * Return: 0 on success, negative error code otherwise. 801 */ 802 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 803 { 804 int i, err = 0; 805 806 for (i = 0; i < nr_ctxs; i++) { 807 /* nr_running_ctxs is decremented in kdamond_fn */ 808 err = __damon_stop(ctxs[i]); 809 if (err) 810 break; 811 } 812 return err; 813 } 814 815 /* 816 * Reset the aggregated monitoring results ('nr_accesses' of each region). 817 */ 818 static void kdamond_reset_aggregated(struct damon_ctx *c) 819 { 820 struct damon_target *t; 821 unsigned int ti = 0; /* target's index */ 822 823 damon_for_each_target(t, c) { 824 struct damon_region *r; 825 826 damon_for_each_region(r, t) { 827 trace_damon_aggregated(ti, r, damon_nr_regions(t)); 828 r->last_nr_accesses = r->nr_accesses; 829 r->nr_accesses = 0; 830 } 831 ti++; 832 } 833 } 834 835 static void damon_split_region_at(struct damon_target *t, 836 struct damon_region *r, unsigned long sz_r); 837 838 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 839 { 840 unsigned long sz; 841 unsigned int nr_accesses = r->nr_accesses_bp / 10000; 842 843 sz = damon_sz_region(r); 844 return s->pattern.min_sz_region <= sz && 845 sz <= s->pattern.max_sz_region && 846 s->pattern.min_nr_accesses <= nr_accesses && 847 nr_accesses <= s->pattern.max_nr_accesses && 848 s->pattern.min_age_region <= r->age && 849 r->age <= s->pattern.max_age_region; 850 } 851 852 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 853 struct damon_region *r, struct damos *s) 854 { 855 bool ret = __damos_valid_target(r, s); 856 857 if (!ret || !s->quota.esz || !c->ops.get_scheme_score) 858 return ret; 859 860 return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; 861 } 862 863 /* 864 * damos_skip_charged_region() - Check if the given region or starting part of 865 * it is already charged for the DAMOS quota. 866 * @t: The target of the region. 867 * @rp: The pointer to the region. 868 * @s: The scheme to be applied. 869 * 870 * If a quota of a scheme has exceeded in a quota charge window, the scheme's 871 * action would applied to only a part of the target access pattern fulfilling 872 * regions. To avoid applying the scheme action to only already applied 873 * regions, DAMON skips applying the scheme action to the regions that charged 874 * in the previous charge window. 875 * 876 * This function checks if a given region should be skipped or not for the 877 * reason. If only the starting part of the region has previously charged, 878 * this function splits the region into two so that the second one covers the 879 * area that not charged in the previous charge widnow and saves the second 880 * region in *rp and returns false, so that the caller can apply DAMON action 881 * to the second one. 882 * 883 * Return: true if the region should be entirely skipped, false otherwise. 884 */ 885 static bool damos_skip_charged_region(struct damon_target *t, 886 struct damon_region **rp, struct damos *s) 887 { 888 struct damon_region *r = *rp; 889 struct damos_quota *quota = &s->quota; 890 unsigned long sz_to_skip; 891 892 /* Skip previously charged regions */ 893 if (quota->charge_target_from) { 894 if (t != quota->charge_target_from) 895 return true; 896 if (r == damon_last_region(t)) { 897 quota->charge_target_from = NULL; 898 quota->charge_addr_from = 0; 899 return true; 900 } 901 if (quota->charge_addr_from && 902 r->ar.end <= quota->charge_addr_from) 903 return true; 904 905 if (quota->charge_addr_from && r->ar.start < 906 quota->charge_addr_from) { 907 sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - 908 r->ar.start, DAMON_MIN_REGION); 909 if (!sz_to_skip) { 910 if (damon_sz_region(r) <= DAMON_MIN_REGION) 911 return true; 912 sz_to_skip = DAMON_MIN_REGION; 913 } 914 damon_split_region_at(t, r, sz_to_skip); 915 r = damon_next_region(r); 916 *rp = r; 917 } 918 quota->charge_target_from = NULL; 919 quota->charge_addr_from = 0; 920 } 921 return false; 922 } 923 924 static void damos_update_stat(struct damos *s, 925 unsigned long sz_tried, unsigned long sz_applied) 926 { 927 s->stat.nr_tried++; 928 s->stat.sz_tried += sz_tried; 929 if (sz_applied) 930 s->stat.nr_applied++; 931 s->stat.sz_applied += sz_applied; 932 } 933 934 static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, 935 struct damon_region *r, struct damos_filter *filter) 936 { 937 bool matched = false; 938 struct damon_target *ti; 939 int target_idx = 0; 940 unsigned long start, end; 941 942 switch (filter->type) { 943 case DAMOS_FILTER_TYPE_TARGET: 944 damon_for_each_target(ti, ctx) { 945 if (ti == t) 946 break; 947 target_idx++; 948 } 949 matched = target_idx == filter->target_idx; 950 break; 951 case DAMOS_FILTER_TYPE_ADDR: 952 start = ALIGN_DOWN(filter->addr_range.start, DAMON_MIN_REGION); 953 end = ALIGN_DOWN(filter->addr_range.end, DAMON_MIN_REGION); 954 955 /* inside the range */ 956 if (start <= r->ar.start && r->ar.end <= end) { 957 matched = true; 958 break; 959 } 960 /* outside of the range */ 961 if (r->ar.end <= start || end <= r->ar.start) { 962 matched = false; 963 break; 964 } 965 /* start before the range and overlap */ 966 if (r->ar.start < start) { 967 damon_split_region_at(t, r, start - r->ar.start); 968 matched = false; 969 break; 970 } 971 /* start inside the range */ 972 damon_split_region_at(t, r, end - r->ar.start); 973 matched = true; 974 break; 975 default: 976 return false; 977 } 978 979 return matched == filter->matching; 980 } 981 982 static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, 983 struct damon_region *r, struct damos *s) 984 { 985 struct damos_filter *filter; 986 987 damos_for_each_filter(filter, s) { 988 if (__damos_filter_out(ctx, t, r, filter)) 989 return true; 990 } 991 return false; 992 } 993 994 static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, 995 struct damon_region *r, struct damos *s) 996 { 997 struct damos_quota *quota = &s->quota; 998 unsigned long sz = damon_sz_region(r); 999 struct timespec64 begin, end; 1000 unsigned long sz_applied = 0; 1001 int err = 0; 1002 /* 1003 * We plan to support multiple context per kdamond, as DAMON sysfs 1004 * implies with 'nr_contexts' file. Nevertheless, only single context 1005 * per kdamond is supported for now. So, we can simply use '0' context 1006 * index here. 1007 */ 1008 unsigned int cidx = 0; 1009 struct damos *siter; /* schemes iterator */ 1010 unsigned int sidx = 0; 1011 struct damon_target *titer; /* targets iterator */ 1012 unsigned int tidx = 0; 1013 bool do_trace = false; 1014 1015 /* get indices for trace_damos_before_apply() */ 1016 if (trace_damos_before_apply_enabled()) { 1017 damon_for_each_scheme(siter, c) { 1018 if (siter == s) 1019 break; 1020 sidx++; 1021 } 1022 damon_for_each_target(titer, c) { 1023 if (titer == t) 1024 break; 1025 tidx++; 1026 } 1027 do_trace = true; 1028 } 1029 1030 if (c->ops.apply_scheme) { 1031 if (quota->esz && quota->charged_sz + sz > quota->esz) { 1032 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 1033 DAMON_MIN_REGION); 1034 if (!sz) 1035 goto update_stat; 1036 damon_split_region_at(t, r, sz); 1037 } 1038 if (damos_filter_out(c, t, r, s)) 1039 return; 1040 ktime_get_coarse_ts64(&begin); 1041 if (c->callback.before_damos_apply) 1042 err = c->callback.before_damos_apply(c, t, r, s); 1043 if (!err) { 1044 trace_damos_before_apply(cidx, sidx, tidx, r, 1045 damon_nr_regions(t), do_trace); 1046 sz_applied = c->ops.apply_scheme(c, t, r, s); 1047 } 1048 ktime_get_coarse_ts64(&end); 1049 quota->total_charged_ns += timespec64_to_ns(&end) - 1050 timespec64_to_ns(&begin); 1051 quota->charged_sz += sz; 1052 if (quota->esz && quota->charged_sz >= quota->esz) { 1053 quota->charge_target_from = t; 1054 quota->charge_addr_from = r->ar.end + 1; 1055 } 1056 } 1057 if (s->action != DAMOS_STAT) 1058 r->age = 0; 1059 1060 update_stat: 1061 damos_update_stat(s, sz, sz_applied); 1062 } 1063 1064 static void damon_do_apply_schemes(struct damon_ctx *c, 1065 struct damon_target *t, 1066 struct damon_region *r) 1067 { 1068 struct damos *s; 1069 1070 damon_for_each_scheme(s, c) { 1071 struct damos_quota *quota = &s->quota; 1072 1073 if (c->passed_sample_intervals != s->next_apply_sis) 1074 continue; 1075 1076 if (!s->wmarks.activated) 1077 continue; 1078 1079 /* Check the quota */ 1080 if (quota->esz && quota->charged_sz >= quota->esz) 1081 continue; 1082 1083 if (damos_skip_charged_region(t, &r, s)) 1084 continue; 1085 1086 if (!damos_valid_target(c, t, r, s)) 1087 continue; 1088 1089 damos_apply_scheme(c, t, r, s); 1090 } 1091 } 1092 1093 /* 1094 * damon_feed_loop_next_input() - get next input to achieve a target score. 1095 * @last_input The last input. 1096 * @score Current score that made with @last_input. 1097 * 1098 * Calculate next input to achieve the target score, based on the last input 1099 * and current score. Assuming the input and the score are positively 1100 * proportional, calculate how much compensation should be added to or 1101 * subtracted from the last input as a proportion of the last input. Avoid 1102 * next input always being zero by setting it non-zero always. In short form 1103 * (assuming support of float and signed calculations), the algorithm is as 1104 * below. 1105 * 1106 * next_input = max(last_input * ((goal - current) / goal + 1), 1) 1107 * 1108 * For simple implementation, we assume the target score is always 10,000. The 1109 * caller should adjust @score for this. 1110 * 1111 * Returns next input that assumed to achieve the target score. 1112 */ 1113 static unsigned long damon_feed_loop_next_input(unsigned long last_input, 1114 unsigned long score) 1115 { 1116 const unsigned long goal = 10000; 1117 unsigned long score_goal_diff = max(goal, score) - min(goal, score); 1118 unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal; 1119 unsigned long compensation = last_input * score_goal_diff_bp / 10000; 1120 /* Set minimum input as 10000 to avoid compensation be zero */ 1121 const unsigned long min_input = 10000; 1122 1123 if (goal > score) 1124 return last_input + compensation; 1125 if (last_input > compensation + min_input) 1126 return last_input - compensation; 1127 return min_input; 1128 } 1129 1130 #ifdef CONFIG_PSI 1131 1132 static u64 damos_get_some_mem_psi_total(void) 1133 { 1134 if (static_branch_likely(&psi_disabled)) 1135 return 0; 1136 return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2], 1137 NSEC_PER_USEC); 1138 } 1139 1140 #else /* CONFIG_PSI */ 1141 1142 static inline u64 damos_get_some_mem_psi_total(void) 1143 { 1144 return 0; 1145 }; 1146 1147 #endif /* CONFIG_PSI */ 1148 1149 static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal) 1150 { 1151 u64 now_psi_total; 1152 1153 switch (goal->metric) { 1154 case DAMOS_QUOTA_USER_INPUT: 1155 /* User should already set goal->current_value */ 1156 break; 1157 case DAMOS_QUOTA_SOME_MEM_PSI_US: 1158 now_psi_total = damos_get_some_mem_psi_total(); 1159 goal->current_value = now_psi_total - goal->last_psi_total; 1160 goal->last_psi_total = now_psi_total; 1161 break; 1162 default: 1163 break; 1164 } 1165 } 1166 1167 /* Return the highest score since it makes schemes least aggressive */ 1168 static unsigned long damos_quota_score(struct damos_quota *quota) 1169 { 1170 struct damos_quota_goal *goal; 1171 unsigned long highest_score = 0; 1172 1173 damos_for_each_quota_goal(goal, quota) { 1174 damos_set_quota_goal_current_value(goal); 1175 highest_score = max(highest_score, 1176 goal->current_value * 10000 / 1177 goal->target_value); 1178 } 1179 1180 return highest_score; 1181 } 1182 1183 /* 1184 * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty 1185 */ 1186 static void damos_set_effective_quota(struct damos_quota *quota) 1187 { 1188 unsigned long throughput; 1189 unsigned long esz; 1190 1191 if (!quota->ms && list_empty("a->goals)) { 1192 quota->esz = quota->sz; 1193 return; 1194 } 1195 1196 if (!list_empty("a->goals)) { 1197 unsigned long score = damos_quota_score(quota); 1198 1199 quota->esz_bp = damon_feed_loop_next_input( 1200 max(quota->esz_bp, 10000UL), 1201 score); 1202 esz = quota->esz_bp / 10000; 1203 } 1204 1205 if (quota->ms) { 1206 if (quota->total_charged_ns) 1207 throughput = quota->total_charged_sz * 1000000 / 1208 quota->total_charged_ns; 1209 else 1210 throughput = PAGE_SIZE * 1024; 1211 if (!list_empty("a->goals)) 1212 esz = min(throughput * quota->ms, esz); 1213 else 1214 esz = throughput * quota->ms; 1215 } 1216 1217 if (quota->sz && quota->sz < esz) 1218 esz = quota->sz; 1219 1220 quota->esz = esz; 1221 } 1222 1223 static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) 1224 { 1225 struct damos_quota *quota = &s->quota; 1226 struct damon_target *t; 1227 struct damon_region *r; 1228 unsigned long cumulated_sz; 1229 unsigned int score, max_score = 0; 1230 1231 if (!quota->ms && !quota->sz && list_empty("a->goals)) 1232 return; 1233 1234 /* New charge window starts */ 1235 if (time_after_eq(jiffies, quota->charged_from + 1236 msecs_to_jiffies(quota->reset_interval))) { 1237 if (quota->esz && quota->charged_sz >= quota->esz) 1238 s->stat.qt_exceeds++; 1239 quota->total_charged_sz += quota->charged_sz; 1240 quota->charged_from = jiffies; 1241 quota->charged_sz = 0; 1242 damos_set_effective_quota(quota); 1243 } 1244 1245 if (!c->ops.get_scheme_score) 1246 return; 1247 1248 /* Fill up the score histogram */ 1249 memset(quota->histogram, 0, sizeof(quota->histogram)); 1250 damon_for_each_target(t, c) { 1251 damon_for_each_region(r, t) { 1252 if (!__damos_valid_target(r, s)) 1253 continue; 1254 score = c->ops.get_scheme_score(c, t, r, s); 1255 quota->histogram[score] += damon_sz_region(r); 1256 if (score > max_score) 1257 max_score = score; 1258 } 1259 } 1260 1261 /* Set the min score limit */ 1262 for (cumulated_sz = 0, score = max_score; ; score--) { 1263 cumulated_sz += quota->histogram[score]; 1264 if (cumulated_sz >= quota->esz || !score) 1265 break; 1266 } 1267 quota->min_score = score; 1268 } 1269 1270 static void kdamond_apply_schemes(struct damon_ctx *c) 1271 { 1272 struct damon_target *t; 1273 struct damon_region *r, *next_r; 1274 struct damos *s; 1275 unsigned long sample_interval = c->attrs.sample_interval ? 1276 c->attrs.sample_interval : 1; 1277 bool has_schemes_to_apply = false; 1278 1279 damon_for_each_scheme(s, c) { 1280 if (c->passed_sample_intervals != s->next_apply_sis) 1281 continue; 1282 1283 if (!s->wmarks.activated) 1284 continue; 1285 1286 has_schemes_to_apply = true; 1287 1288 damos_adjust_quota(c, s); 1289 } 1290 1291 if (!has_schemes_to_apply) 1292 return; 1293 1294 damon_for_each_target(t, c) { 1295 damon_for_each_region_safe(r, next_r, t) 1296 damon_do_apply_schemes(c, t, r); 1297 } 1298 1299 damon_for_each_scheme(s, c) { 1300 if (c->passed_sample_intervals != s->next_apply_sis) 1301 continue; 1302 s->next_apply_sis += 1303 (s->apply_interval_us ? s->apply_interval_us : 1304 c->attrs.aggr_interval) / sample_interval; 1305 } 1306 } 1307 1308 /* 1309 * Merge two adjacent regions into one region 1310 */ 1311 static void damon_merge_two_regions(struct damon_target *t, 1312 struct damon_region *l, struct damon_region *r) 1313 { 1314 unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r); 1315 1316 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 1317 (sz_l + sz_r); 1318 l->nr_accesses_bp = l->nr_accesses * 10000; 1319 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 1320 l->ar.end = r->ar.end; 1321 damon_destroy_region(r, t); 1322 } 1323 1324 /* 1325 * Merge adjacent regions having similar access frequencies 1326 * 1327 * t target affected by this merge operation 1328 * thres '->nr_accesses' diff threshold for the merge 1329 * sz_limit size upper limit of each region 1330 */ 1331 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 1332 unsigned long sz_limit) 1333 { 1334 struct damon_region *r, *prev = NULL, *next; 1335 1336 damon_for_each_region_safe(r, next, t) { 1337 if (abs(r->nr_accesses - r->last_nr_accesses) > thres) 1338 r->age = 0; 1339 else 1340 r->age++; 1341 1342 if (prev && prev->ar.end == r->ar.start && 1343 abs(prev->nr_accesses - r->nr_accesses) <= thres && 1344 damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) 1345 damon_merge_two_regions(t, prev, r); 1346 else 1347 prev = r; 1348 } 1349 } 1350 1351 /* 1352 * Merge adjacent regions having similar access frequencies 1353 * 1354 * threshold '->nr_accesses' diff threshold for the merge 1355 * sz_limit size upper limit of each region 1356 * 1357 * This function merges monitoring target regions which are adjacent and their 1358 * access frequencies are similar. This is for minimizing the monitoring 1359 * overhead under the dynamically changeable access pattern. If a merge was 1360 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 1361 */ 1362 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 1363 unsigned long sz_limit) 1364 { 1365 struct damon_target *t; 1366 1367 damon_for_each_target(t, c) 1368 damon_merge_regions_of(t, threshold, sz_limit); 1369 } 1370 1371 /* 1372 * Split a region in two 1373 * 1374 * r the region to be split 1375 * sz_r size of the first sub-region that will be made 1376 */ 1377 static void damon_split_region_at(struct damon_target *t, 1378 struct damon_region *r, unsigned long sz_r) 1379 { 1380 struct damon_region *new; 1381 1382 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 1383 if (!new) 1384 return; 1385 1386 r->ar.end = new->ar.start; 1387 1388 new->age = r->age; 1389 new->last_nr_accesses = r->last_nr_accesses; 1390 new->nr_accesses_bp = r->nr_accesses_bp; 1391 new->nr_accesses = r->nr_accesses; 1392 1393 damon_insert_region(new, r, damon_next_region(r), t); 1394 } 1395 1396 /* Split every region in the given target into 'nr_subs' regions */ 1397 static void damon_split_regions_of(struct damon_target *t, int nr_subs) 1398 { 1399 struct damon_region *r, *next; 1400 unsigned long sz_region, sz_sub = 0; 1401 int i; 1402 1403 damon_for_each_region_safe(r, next, t) { 1404 sz_region = damon_sz_region(r); 1405 1406 for (i = 0; i < nr_subs - 1 && 1407 sz_region > 2 * DAMON_MIN_REGION; i++) { 1408 /* 1409 * Randomly select size of left sub-region to be at 1410 * least 10 percent and at most 90% of original region 1411 */ 1412 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 1413 sz_region / 10, DAMON_MIN_REGION); 1414 /* Do not allow blank region */ 1415 if (sz_sub == 0 || sz_sub >= sz_region) 1416 continue; 1417 1418 damon_split_region_at(t, r, sz_sub); 1419 sz_region = sz_sub; 1420 } 1421 } 1422 } 1423 1424 /* 1425 * Split every target region into randomly-sized small regions 1426 * 1427 * This function splits every target region into random-sized small regions if 1428 * current total number of the regions is equal or smaller than half of the 1429 * user-specified maximum number of regions. This is for maximizing the 1430 * monitoring accuracy under the dynamically changeable access patterns. If a 1431 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 1432 * it. 1433 */ 1434 static void kdamond_split_regions(struct damon_ctx *ctx) 1435 { 1436 struct damon_target *t; 1437 unsigned int nr_regions = 0; 1438 static unsigned int last_nr_regions; 1439 int nr_subregions = 2; 1440 1441 damon_for_each_target(t, ctx) 1442 nr_regions += damon_nr_regions(t); 1443 1444 if (nr_regions > ctx->attrs.max_nr_regions / 2) 1445 return; 1446 1447 /* Maybe the middle of the region has different access frequency */ 1448 if (last_nr_regions == nr_regions && 1449 nr_regions < ctx->attrs.max_nr_regions / 3) 1450 nr_subregions = 3; 1451 1452 damon_for_each_target(t, ctx) 1453 damon_split_regions_of(t, nr_subregions); 1454 1455 last_nr_regions = nr_regions; 1456 } 1457 1458 /* 1459 * Check whether current monitoring should be stopped 1460 * 1461 * The monitoring is stopped when either the user requested to stop, or all 1462 * monitoring targets are invalid. 1463 * 1464 * Returns true if need to stop current monitoring. 1465 */ 1466 static bool kdamond_need_stop(struct damon_ctx *ctx) 1467 { 1468 struct damon_target *t; 1469 1470 if (kthread_should_stop()) 1471 return true; 1472 1473 if (!ctx->ops.target_valid) 1474 return false; 1475 1476 damon_for_each_target(t, ctx) { 1477 if (ctx->ops.target_valid(t)) 1478 return false; 1479 } 1480 1481 return true; 1482 } 1483 1484 static int damos_get_wmark_metric_value(enum damos_wmark_metric metric, 1485 unsigned long *metric_value) 1486 { 1487 switch (metric) { 1488 case DAMOS_WMARK_FREE_MEM_RATE: 1489 *metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 / 1490 totalram_pages(); 1491 return 0; 1492 default: 1493 break; 1494 } 1495 return -EINVAL; 1496 } 1497 1498 /* 1499 * Returns zero if the scheme is active. Else, returns time to wait for next 1500 * watermark check in micro-seconds. 1501 */ 1502 static unsigned long damos_wmark_wait_us(struct damos *scheme) 1503 { 1504 unsigned long metric; 1505 1506 if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric)) 1507 return 0; 1508 1509 /* higher than high watermark or lower than low watermark */ 1510 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 1511 if (scheme->wmarks.activated) 1512 pr_debug("deactivate a scheme (%d) for %s wmark\n", 1513 scheme->action, 1514 metric > scheme->wmarks.high ? 1515 "high" : "low"); 1516 scheme->wmarks.activated = false; 1517 return scheme->wmarks.interval; 1518 } 1519 1520 /* inactive and higher than middle watermark */ 1521 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 1522 !scheme->wmarks.activated) 1523 return scheme->wmarks.interval; 1524 1525 if (!scheme->wmarks.activated) 1526 pr_debug("activate a scheme (%d)\n", scheme->action); 1527 scheme->wmarks.activated = true; 1528 return 0; 1529 } 1530 1531 static void kdamond_usleep(unsigned long usecs) 1532 { 1533 /* See Documentation/timers/timers-howto.rst for the thresholds */ 1534 if (usecs > 20 * USEC_PER_MSEC) 1535 schedule_timeout_idle(usecs_to_jiffies(usecs)); 1536 else 1537 usleep_idle_range(usecs, usecs + 1); 1538 } 1539 1540 /* Returns negative error code if it's not activated but should return */ 1541 static int kdamond_wait_activation(struct damon_ctx *ctx) 1542 { 1543 struct damos *s; 1544 unsigned long wait_time; 1545 unsigned long min_wait_time = 0; 1546 bool init_wait_time = false; 1547 1548 while (!kdamond_need_stop(ctx)) { 1549 damon_for_each_scheme(s, ctx) { 1550 wait_time = damos_wmark_wait_us(s); 1551 if (!init_wait_time || wait_time < min_wait_time) { 1552 init_wait_time = true; 1553 min_wait_time = wait_time; 1554 } 1555 } 1556 if (!min_wait_time) 1557 return 0; 1558 1559 kdamond_usleep(min_wait_time); 1560 1561 if (ctx->callback.after_wmarks_check && 1562 ctx->callback.after_wmarks_check(ctx)) 1563 break; 1564 } 1565 return -EBUSY; 1566 } 1567 1568 static void kdamond_init_intervals_sis(struct damon_ctx *ctx) 1569 { 1570 unsigned long sample_interval = ctx->attrs.sample_interval ? 1571 ctx->attrs.sample_interval : 1; 1572 unsigned long apply_interval; 1573 struct damos *scheme; 1574 1575 ctx->passed_sample_intervals = 0; 1576 ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; 1577 ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / 1578 sample_interval; 1579 1580 damon_for_each_scheme(scheme, ctx) { 1581 apply_interval = scheme->apply_interval_us ? 1582 scheme->apply_interval_us : ctx->attrs.aggr_interval; 1583 scheme->next_apply_sis = apply_interval / sample_interval; 1584 } 1585 } 1586 1587 /* 1588 * The monitoring daemon that runs as a kernel thread 1589 */ 1590 static int kdamond_fn(void *data) 1591 { 1592 struct damon_ctx *ctx = data; 1593 struct damon_target *t; 1594 struct damon_region *r, *next; 1595 unsigned int max_nr_accesses = 0; 1596 unsigned long sz_limit = 0; 1597 1598 pr_debug("kdamond (%d) starts\n", current->pid); 1599 1600 complete(&ctx->kdamond_started); 1601 kdamond_init_intervals_sis(ctx); 1602 1603 if (ctx->ops.init) 1604 ctx->ops.init(ctx); 1605 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 1606 goto done; 1607 1608 sz_limit = damon_region_sz_limit(ctx); 1609 1610 while (!kdamond_need_stop(ctx)) { 1611 /* 1612 * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could 1613 * be changed from after_wmarks_check() or after_aggregation() 1614 * callbacks. Read the values here, and use those for this 1615 * iteration. That is, damon_set_attrs() updated new values 1616 * are respected from next iteration. 1617 */ 1618 unsigned long next_aggregation_sis = ctx->next_aggregation_sis; 1619 unsigned long next_ops_update_sis = ctx->next_ops_update_sis; 1620 unsigned long sample_interval = ctx->attrs.sample_interval; 1621 1622 if (kdamond_wait_activation(ctx)) 1623 break; 1624 1625 if (ctx->ops.prepare_access_checks) 1626 ctx->ops.prepare_access_checks(ctx); 1627 if (ctx->callback.after_sampling && 1628 ctx->callback.after_sampling(ctx)) 1629 break; 1630 1631 kdamond_usleep(sample_interval); 1632 ctx->passed_sample_intervals++; 1633 1634 if (ctx->ops.check_accesses) 1635 max_nr_accesses = ctx->ops.check_accesses(ctx); 1636 1637 if (ctx->passed_sample_intervals == next_aggregation_sis) { 1638 kdamond_merge_regions(ctx, 1639 max_nr_accesses / 10, 1640 sz_limit); 1641 if (ctx->callback.after_aggregation && 1642 ctx->callback.after_aggregation(ctx)) 1643 break; 1644 } 1645 1646 /* 1647 * do kdamond_apply_schemes() after kdamond_merge_regions() if 1648 * possible, to reduce overhead 1649 */ 1650 if (!list_empty(&ctx->schemes)) 1651 kdamond_apply_schemes(ctx); 1652 1653 sample_interval = ctx->attrs.sample_interval ? 1654 ctx->attrs.sample_interval : 1; 1655 if (ctx->passed_sample_intervals == next_aggregation_sis) { 1656 ctx->next_aggregation_sis = next_aggregation_sis + 1657 ctx->attrs.aggr_interval / sample_interval; 1658 1659 kdamond_reset_aggregated(ctx); 1660 kdamond_split_regions(ctx); 1661 if (ctx->ops.reset_aggregated) 1662 ctx->ops.reset_aggregated(ctx); 1663 } 1664 1665 if (ctx->passed_sample_intervals == next_ops_update_sis) { 1666 ctx->next_ops_update_sis = next_ops_update_sis + 1667 ctx->attrs.ops_update_interval / 1668 sample_interval; 1669 if (ctx->ops.update) 1670 ctx->ops.update(ctx); 1671 sz_limit = damon_region_sz_limit(ctx); 1672 } 1673 } 1674 done: 1675 damon_for_each_target(t, ctx) { 1676 damon_for_each_region_safe(r, next, t) 1677 damon_destroy_region(r, t); 1678 } 1679 1680 if (ctx->callback.before_terminate) 1681 ctx->callback.before_terminate(ctx); 1682 if (ctx->ops.cleanup) 1683 ctx->ops.cleanup(ctx); 1684 1685 pr_debug("kdamond (%d) finishes\n", current->pid); 1686 mutex_lock(&ctx->kdamond_lock); 1687 ctx->kdamond = NULL; 1688 mutex_unlock(&ctx->kdamond_lock); 1689 1690 mutex_lock(&damon_lock); 1691 nr_running_ctxs--; 1692 if (!nr_running_ctxs && running_exclusive_ctxs) 1693 running_exclusive_ctxs = false; 1694 mutex_unlock(&damon_lock); 1695 1696 return 0; 1697 } 1698 1699 /* 1700 * struct damon_system_ram_region - System RAM resource address region of 1701 * [@start, @end). 1702 * @start: Start address of the region (inclusive). 1703 * @end: End address of the region (exclusive). 1704 */ 1705 struct damon_system_ram_region { 1706 unsigned long start; 1707 unsigned long end; 1708 }; 1709 1710 static int walk_system_ram(struct resource *res, void *arg) 1711 { 1712 struct damon_system_ram_region *a = arg; 1713 1714 if (a->end - a->start < resource_size(res)) { 1715 a->start = res->start; 1716 a->end = res->end; 1717 } 1718 return 0; 1719 } 1720 1721 /* 1722 * Find biggest 'System RAM' resource and store its start and end address in 1723 * @start and @end, respectively. If no System RAM is found, returns false. 1724 */ 1725 static bool damon_find_biggest_system_ram(unsigned long *start, 1726 unsigned long *end) 1727 1728 { 1729 struct damon_system_ram_region arg = {}; 1730 1731 walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); 1732 if (arg.end <= arg.start) 1733 return false; 1734 1735 *start = arg.start; 1736 *end = arg.end; 1737 return true; 1738 } 1739 1740 /** 1741 * damon_set_region_biggest_system_ram_default() - Set the region of the given 1742 * monitoring target as requested, or biggest 'System RAM'. 1743 * @t: The monitoring target to set the region. 1744 * @start: The pointer to the start address of the region. 1745 * @end: The pointer to the end address of the region. 1746 * 1747 * This function sets the region of @t as requested by @start and @end. If the 1748 * values of @start and @end are zero, however, this function finds the biggest 1749 * 'System RAM' resource and sets the region to cover the resource. In the 1750 * latter case, this function saves the start and end addresses of the resource 1751 * in @start and @end, respectively. 1752 * 1753 * Return: 0 on success, negative error code otherwise. 1754 */ 1755 int damon_set_region_biggest_system_ram_default(struct damon_target *t, 1756 unsigned long *start, unsigned long *end) 1757 { 1758 struct damon_addr_range addr_range; 1759 1760 if (*start > *end) 1761 return -EINVAL; 1762 1763 if (!*start && !*end && 1764 !damon_find_biggest_system_ram(start, end)) 1765 return -EINVAL; 1766 1767 addr_range.start = *start; 1768 addr_range.end = *end; 1769 return damon_set_regions(t, &addr_range, 1); 1770 } 1771 1772 /* 1773 * damon_moving_sum() - Calculate an inferred moving sum value. 1774 * @mvsum: Inferred sum of the last @len_window values. 1775 * @nomvsum: Non-moving sum of the last discrete @len_window window values. 1776 * @len_window: The number of last values to take care of. 1777 * @new_value: New value that will be added to the pseudo moving sum. 1778 * 1779 * Moving sum (moving average * window size) is good for handling noise, but 1780 * the cost of keeping past values can be high for arbitrary window size. This 1781 * function implements a lightweight pseudo moving sum function that doesn't 1782 * keep the past window values. 1783 * 1784 * It simply assumes there was no noise in the past, and get the no-noise 1785 * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a 1786 * non-moving sum of the last window. For example, if @len_window is 10 and we 1787 * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 1788 * values. Hence, this function simply drops @nomvsum / @len_window from 1789 * given @mvsum and add @new_value. 1790 * 1791 * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for 1792 * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For 1793 * calculating next moving sum with a new value, we should drop 0 from 50 and 1794 * add the new value. However, this function assumes it got value 5 for each 1795 * of the last ten times. Based on the assumption, when the next value is 1796 * measured, it drops the assumed past value, 5 from the current sum, and add 1797 * the new value to get the updated pseduo-moving average. 1798 * 1799 * This means the value could have errors, but the errors will be disappeared 1800 * for every @len_window aligned calls. For example, if @len_window is 10, the 1801 * pseudo moving sum with 11th value to 19th value would have an error. But 1802 * the sum with 20th value will not have the error. 1803 * 1804 * Return: Pseudo-moving average after getting the @new_value. 1805 */ 1806 static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, 1807 unsigned int len_window, unsigned int new_value) 1808 { 1809 return mvsum - nomvsum / len_window + new_value; 1810 } 1811 1812 /** 1813 * damon_update_region_access_rate() - Update the access rate of a region. 1814 * @r: The DAMON region to update for its access check result. 1815 * @accessed: Whether the region has accessed during last sampling interval. 1816 * @attrs: The damon_attrs of the DAMON context. 1817 * 1818 * Update the access rate of a region with the region's last sampling interval 1819 * access check result. 1820 * 1821 * Usually this will be called by &damon_operations->check_accesses callback. 1822 */ 1823 void damon_update_region_access_rate(struct damon_region *r, bool accessed, 1824 struct damon_attrs *attrs) 1825 { 1826 unsigned int len_window = 1; 1827 1828 /* 1829 * sample_interval can be zero, but cannot be larger than 1830 * aggr_interval, owing to validation of damon_set_attrs(). 1831 */ 1832 if (attrs->sample_interval) 1833 len_window = damon_max_nr_accesses(attrs); 1834 r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, 1835 r->last_nr_accesses * 10000, len_window, 1836 accessed ? 10000 : 0); 1837 1838 if (accessed) 1839 r->nr_accesses++; 1840 } 1841 1842 static int __init damon_init(void) 1843 { 1844 damon_region_cache = KMEM_CACHE(damon_region, 0); 1845 if (unlikely(!damon_region_cache)) { 1846 pr_err("creating damon_region_cache fails\n"); 1847 return -ENOMEM; 1848 } 1849 1850 return 0; 1851 } 1852 1853 subsys_initcall(damon_init); 1854 1855 #include "core-test.h" 1856