1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/random.h> 14 #include <linux/slab.h> 15 16 #define CREATE_TRACE_POINTS 17 #include <trace/events/damon.h> 18 19 #ifdef CONFIG_DAMON_KUNIT_TEST 20 #undef DAMON_MIN_REGION 21 #define DAMON_MIN_REGION 1 22 #endif 23 24 /* Get a random number in [l, r) */ 25 #define damon_rand(l, r) (l + prandom_u32_max(r - l)) 26 27 static DEFINE_MUTEX(damon_lock); 28 static int nr_running_ctxs; 29 30 /* 31 * Construct a damon_region struct 32 * 33 * Returns the pointer to the new struct if success, or NULL otherwise 34 */ 35 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 36 { 37 struct damon_region *region; 38 39 region = kmalloc(sizeof(*region), GFP_KERNEL); 40 if (!region) 41 return NULL; 42 43 region->ar.start = start; 44 region->ar.end = end; 45 region->nr_accesses = 0; 46 INIT_LIST_HEAD(®ion->list); 47 48 return region; 49 } 50 51 /* 52 * Add a region between two other regions 53 */ 54 inline void damon_insert_region(struct damon_region *r, 55 struct damon_region *prev, struct damon_region *next, 56 struct damon_target *t) 57 { 58 __list_add(&r->list, &prev->list, &next->list); 59 t->nr_regions++; 60 } 61 62 void damon_add_region(struct damon_region *r, struct damon_target *t) 63 { 64 list_add_tail(&r->list, &t->regions_list); 65 t->nr_regions++; 66 } 67 68 static void damon_del_region(struct damon_region *r, struct damon_target *t) 69 { 70 list_del(&r->list); 71 t->nr_regions--; 72 } 73 74 static void damon_free_region(struct damon_region *r) 75 { 76 kfree(r); 77 } 78 79 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 80 { 81 damon_del_region(r, t); 82 damon_free_region(r); 83 } 84 85 /* 86 * Construct a damon_target struct 87 * 88 * Returns the pointer to the new struct if success, or NULL otherwise 89 */ 90 struct damon_target *damon_new_target(unsigned long id) 91 { 92 struct damon_target *t; 93 94 t = kmalloc(sizeof(*t), GFP_KERNEL); 95 if (!t) 96 return NULL; 97 98 t->id = id; 99 t->nr_regions = 0; 100 INIT_LIST_HEAD(&t->regions_list); 101 102 return t; 103 } 104 105 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 106 { 107 list_add_tail(&t->list, &ctx->adaptive_targets); 108 } 109 110 static void damon_del_target(struct damon_target *t) 111 { 112 list_del(&t->list); 113 } 114 115 void damon_free_target(struct damon_target *t) 116 { 117 struct damon_region *r, *next; 118 119 damon_for_each_region_safe(r, next, t) 120 damon_free_region(r); 121 kfree(t); 122 } 123 124 void damon_destroy_target(struct damon_target *t) 125 { 126 damon_del_target(t); 127 damon_free_target(t); 128 } 129 130 unsigned int damon_nr_regions(struct damon_target *t) 131 { 132 return t->nr_regions; 133 } 134 135 struct damon_ctx *damon_new_ctx(void) 136 { 137 struct damon_ctx *ctx; 138 139 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 140 if (!ctx) 141 return NULL; 142 143 ctx->sample_interval = 5 * 1000; 144 ctx->aggr_interval = 100 * 1000; 145 ctx->primitive_update_interval = 60 * 1000 * 1000; 146 147 ktime_get_coarse_ts64(&ctx->last_aggregation); 148 ctx->last_primitive_update = ctx->last_aggregation; 149 150 mutex_init(&ctx->kdamond_lock); 151 152 ctx->min_nr_regions = 10; 153 ctx->max_nr_regions = 1000; 154 155 INIT_LIST_HEAD(&ctx->adaptive_targets); 156 157 return ctx; 158 } 159 160 static void damon_destroy_targets(struct damon_ctx *ctx) 161 { 162 struct damon_target *t, *next_t; 163 164 if (ctx->primitive.cleanup) { 165 ctx->primitive.cleanup(ctx); 166 return; 167 } 168 169 damon_for_each_target_safe(t, next_t, ctx) 170 damon_destroy_target(t); 171 } 172 173 void damon_destroy_ctx(struct damon_ctx *ctx) 174 { 175 damon_destroy_targets(ctx); 176 kfree(ctx); 177 } 178 179 /** 180 * damon_set_targets() - Set monitoring targets. 181 * @ctx: monitoring context 182 * @ids: array of target ids 183 * @nr_ids: number of entries in @ids 184 * 185 * This function should not be called while the kdamond is running. 186 * 187 * Return: 0 on success, negative error code otherwise. 188 */ 189 int damon_set_targets(struct damon_ctx *ctx, 190 unsigned long *ids, ssize_t nr_ids) 191 { 192 ssize_t i; 193 struct damon_target *t, *next; 194 195 damon_destroy_targets(ctx); 196 197 for (i = 0; i < nr_ids; i++) { 198 t = damon_new_target(ids[i]); 199 if (!t) { 200 pr_err("Failed to alloc damon_target\n"); 201 /* The caller should do cleanup of the ids itself */ 202 damon_for_each_target_safe(t, next, ctx) 203 damon_destroy_target(t); 204 return -ENOMEM; 205 } 206 damon_add_target(ctx, t); 207 } 208 209 return 0; 210 } 211 212 /** 213 * damon_set_attrs() - Set attributes for the monitoring. 214 * @ctx: monitoring context 215 * @sample_int: time interval between samplings 216 * @aggr_int: time interval between aggregations 217 * @primitive_upd_int: time interval between monitoring primitive updates 218 * @min_nr_reg: minimal number of regions 219 * @max_nr_reg: maximum number of regions 220 * 221 * This function should not be called while the kdamond is running. 222 * Every time interval is in micro-seconds. 223 * 224 * Return: 0 on success, negative error code otherwise. 225 */ 226 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 227 unsigned long aggr_int, unsigned long primitive_upd_int, 228 unsigned long min_nr_reg, unsigned long max_nr_reg) 229 { 230 if (min_nr_reg < 3) { 231 pr_err("min_nr_regions (%lu) must be at least 3\n", 232 min_nr_reg); 233 return -EINVAL; 234 } 235 if (min_nr_reg > max_nr_reg) { 236 pr_err("invalid nr_regions. min (%lu) > max (%lu)\n", 237 min_nr_reg, max_nr_reg); 238 return -EINVAL; 239 } 240 241 ctx->sample_interval = sample_int; 242 ctx->aggr_interval = aggr_int; 243 ctx->primitive_update_interval = primitive_upd_int; 244 ctx->min_nr_regions = min_nr_reg; 245 ctx->max_nr_regions = max_nr_reg; 246 247 return 0; 248 } 249 250 /** 251 * damon_nr_running_ctxs() - Return number of currently running contexts. 252 */ 253 int damon_nr_running_ctxs(void) 254 { 255 int nr_ctxs; 256 257 mutex_lock(&damon_lock); 258 nr_ctxs = nr_running_ctxs; 259 mutex_unlock(&damon_lock); 260 261 return nr_ctxs; 262 } 263 264 /* Returns the size upper limit for each monitoring region */ 265 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 266 { 267 struct damon_target *t; 268 struct damon_region *r; 269 unsigned long sz = 0; 270 271 damon_for_each_target(t, ctx) { 272 damon_for_each_region(r, t) 273 sz += r->ar.end - r->ar.start; 274 } 275 276 if (ctx->min_nr_regions) 277 sz /= ctx->min_nr_regions; 278 if (sz < DAMON_MIN_REGION) 279 sz = DAMON_MIN_REGION; 280 281 return sz; 282 } 283 284 static bool damon_kdamond_running(struct damon_ctx *ctx) 285 { 286 bool running; 287 288 mutex_lock(&ctx->kdamond_lock); 289 running = ctx->kdamond != NULL; 290 mutex_unlock(&ctx->kdamond_lock); 291 292 return running; 293 } 294 295 static int kdamond_fn(void *data); 296 297 /* 298 * __damon_start() - Starts monitoring with given context. 299 * @ctx: monitoring context 300 * 301 * This function should be called while damon_lock is hold. 302 * 303 * Return: 0 on success, negative error code otherwise. 304 */ 305 static int __damon_start(struct damon_ctx *ctx) 306 { 307 int err = -EBUSY; 308 309 mutex_lock(&ctx->kdamond_lock); 310 if (!ctx->kdamond) { 311 err = 0; 312 ctx->kdamond_stop = false; 313 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 314 nr_running_ctxs); 315 if (IS_ERR(ctx->kdamond)) { 316 err = PTR_ERR(ctx->kdamond); 317 ctx->kdamond = 0; 318 } 319 } 320 mutex_unlock(&ctx->kdamond_lock); 321 322 return err; 323 } 324 325 /** 326 * damon_start() - Starts the monitorings for a given group of contexts. 327 * @ctxs: an array of the pointers for contexts to start monitoring 328 * @nr_ctxs: size of @ctxs 329 * 330 * This function starts a group of monitoring threads for a group of monitoring 331 * contexts. One thread per each context is created and run in parallel. The 332 * caller should handle synchronization between the threads by itself. If a 333 * group of threads that created by other 'damon_start()' call is currently 334 * running, this function does nothing but returns -EBUSY. 335 * 336 * Return: 0 on success, negative error code otherwise. 337 */ 338 int damon_start(struct damon_ctx **ctxs, int nr_ctxs) 339 { 340 int i; 341 int err = 0; 342 343 mutex_lock(&damon_lock); 344 if (nr_running_ctxs) { 345 mutex_unlock(&damon_lock); 346 return -EBUSY; 347 } 348 349 for (i = 0; i < nr_ctxs; i++) { 350 err = __damon_start(ctxs[i]); 351 if (err) 352 break; 353 nr_running_ctxs++; 354 } 355 mutex_unlock(&damon_lock); 356 357 return err; 358 } 359 360 /* 361 * __damon_stop() - Stops monitoring of given context. 362 * @ctx: monitoring context 363 * 364 * Return: 0 on success, negative error code otherwise. 365 */ 366 static int __damon_stop(struct damon_ctx *ctx) 367 { 368 mutex_lock(&ctx->kdamond_lock); 369 if (ctx->kdamond) { 370 ctx->kdamond_stop = true; 371 mutex_unlock(&ctx->kdamond_lock); 372 while (damon_kdamond_running(ctx)) 373 usleep_range(ctx->sample_interval, 374 ctx->sample_interval * 2); 375 return 0; 376 } 377 mutex_unlock(&ctx->kdamond_lock); 378 379 return -EPERM; 380 } 381 382 /** 383 * damon_stop() - Stops the monitorings for a given group of contexts. 384 * @ctxs: an array of the pointers for contexts to stop monitoring 385 * @nr_ctxs: size of @ctxs 386 * 387 * Return: 0 on success, negative error code otherwise. 388 */ 389 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 390 { 391 int i, err = 0; 392 393 for (i = 0; i < nr_ctxs; i++) { 394 /* nr_running_ctxs is decremented in kdamond_fn */ 395 err = __damon_stop(ctxs[i]); 396 if (err) 397 return err; 398 } 399 400 return err; 401 } 402 403 /* 404 * damon_check_reset_time_interval() - Check if a time interval is elapsed. 405 * @baseline: the time to check whether the interval has elapsed since 406 * @interval: the time interval (microseconds) 407 * 408 * See whether the given time interval has passed since the given baseline 409 * time. If so, it also updates the baseline to current time for next check. 410 * 411 * Return: true if the time interval has passed, or false otherwise. 412 */ 413 static bool damon_check_reset_time_interval(struct timespec64 *baseline, 414 unsigned long interval) 415 { 416 struct timespec64 now; 417 418 ktime_get_coarse_ts64(&now); 419 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < 420 interval * 1000) 421 return false; 422 *baseline = now; 423 return true; 424 } 425 426 /* 427 * Check whether it is time to flush the aggregated information 428 */ 429 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) 430 { 431 return damon_check_reset_time_interval(&ctx->last_aggregation, 432 ctx->aggr_interval); 433 } 434 435 /* 436 * Reset the aggregated monitoring results ('nr_accesses' of each region). 437 */ 438 static void kdamond_reset_aggregated(struct damon_ctx *c) 439 { 440 struct damon_target *t; 441 442 damon_for_each_target(t, c) { 443 struct damon_region *r; 444 445 damon_for_each_region(r, t) { 446 trace_damon_aggregated(t, r, damon_nr_regions(t)); 447 r->nr_accesses = 0; 448 } 449 } 450 } 451 452 #define sz_damon_region(r) (r->ar.end - r->ar.start) 453 454 /* 455 * Merge two adjacent regions into one region 456 */ 457 static void damon_merge_two_regions(struct damon_target *t, 458 struct damon_region *l, struct damon_region *r) 459 { 460 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); 461 462 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 463 (sz_l + sz_r); 464 l->ar.end = r->ar.end; 465 damon_destroy_region(r, t); 466 } 467 468 #define diff_of(a, b) (a > b ? a - b : b - a) 469 470 /* 471 * Merge adjacent regions having similar access frequencies 472 * 473 * t target affected by this merge operation 474 * thres '->nr_accesses' diff threshold for the merge 475 * sz_limit size upper limit of each region 476 */ 477 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 478 unsigned long sz_limit) 479 { 480 struct damon_region *r, *prev = NULL, *next; 481 482 damon_for_each_region_safe(r, next, t) { 483 if (prev && prev->ar.end == r->ar.start && 484 diff_of(prev->nr_accesses, r->nr_accesses) <= thres && 485 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) 486 damon_merge_two_regions(t, prev, r); 487 else 488 prev = r; 489 } 490 } 491 492 /* 493 * Merge adjacent regions having similar access frequencies 494 * 495 * threshold '->nr_accesses' diff threshold for the merge 496 * sz_limit size upper limit of each region 497 * 498 * This function merges monitoring target regions which are adjacent and their 499 * access frequencies are similar. This is for minimizing the monitoring 500 * overhead under the dynamically changeable access pattern. If a merge was 501 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 502 */ 503 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 504 unsigned long sz_limit) 505 { 506 struct damon_target *t; 507 508 damon_for_each_target(t, c) 509 damon_merge_regions_of(t, threshold, sz_limit); 510 } 511 512 /* 513 * Split a region in two 514 * 515 * r the region to be split 516 * sz_r size of the first sub-region that will be made 517 */ 518 static void damon_split_region_at(struct damon_ctx *ctx, 519 struct damon_target *t, struct damon_region *r, 520 unsigned long sz_r) 521 { 522 struct damon_region *new; 523 524 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 525 if (!new) 526 return; 527 528 r->ar.end = new->ar.start; 529 530 damon_insert_region(new, r, damon_next_region(r), t); 531 } 532 533 /* Split every region in the given target into 'nr_subs' regions */ 534 static void damon_split_regions_of(struct damon_ctx *ctx, 535 struct damon_target *t, int nr_subs) 536 { 537 struct damon_region *r, *next; 538 unsigned long sz_region, sz_sub = 0; 539 int i; 540 541 damon_for_each_region_safe(r, next, t) { 542 sz_region = r->ar.end - r->ar.start; 543 544 for (i = 0; i < nr_subs - 1 && 545 sz_region > 2 * DAMON_MIN_REGION; i++) { 546 /* 547 * Randomly select size of left sub-region to be at 548 * least 10 percent and at most 90% of original region 549 */ 550 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 551 sz_region / 10, DAMON_MIN_REGION); 552 /* Do not allow blank region */ 553 if (sz_sub == 0 || sz_sub >= sz_region) 554 continue; 555 556 damon_split_region_at(ctx, t, r, sz_sub); 557 sz_region = sz_sub; 558 } 559 } 560 } 561 562 /* 563 * Split every target region into randomly-sized small regions 564 * 565 * This function splits every target region into random-sized small regions if 566 * current total number of the regions is equal or smaller than half of the 567 * user-specified maximum number of regions. This is for maximizing the 568 * monitoring accuracy under the dynamically changeable access patterns. If a 569 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 570 * it. 571 */ 572 static void kdamond_split_regions(struct damon_ctx *ctx) 573 { 574 struct damon_target *t; 575 unsigned int nr_regions = 0; 576 static unsigned int last_nr_regions; 577 int nr_subregions = 2; 578 579 damon_for_each_target(t, ctx) 580 nr_regions += damon_nr_regions(t); 581 582 if (nr_regions > ctx->max_nr_regions / 2) 583 return; 584 585 /* Maybe the middle of the region has different access frequency */ 586 if (last_nr_regions == nr_regions && 587 nr_regions < ctx->max_nr_regions / 3) 588 nr_subregions = 3; 589 590 damon_for_each_target(t, ctx) 591 damon_split_regions_of(ctx, t, nr_subregions); 592 593 last_nr_regions = nr_regions; 594 } 595 596 /* 597 * Check whether it is time to check and apply the target monitoring regions 598 * 599 * Returns true if it is. 600 */ 601 static bool kdamond_need_update_primitive(struct damon_ctx *ctx) 602 { 603 return damon_check_reset_time_interval(&ctx->last_primitive_update, 604 ctx->primitive_update_interval); 605 } 606 607 /* 608 * Check whether current monitoring should be stopped 609 * 610 * The monitoring is stopped when either the user requested to stop, or all 611 * monitoring targets are invalid. 612 * 613 * Returns true if need to stop current monitoring. 614 */ 615 static bool kdamond_need_stop(struct damon_ctx *ctx) 616 { 617 struct damon_target *t; 618 bool stop; 619 620 mutex_lock(&ctx->kdamond_lock); 621 stop = ctx->kdamond_stop; 622 mutex_unlock(&ctx->kdamond_lock); 623 if (stop) 624 return true; 625 626 if (!ctx->primitive.target_valid) 627 return false; 628 629 damon_for_each_target(t, ctx) { 630 if (ctx->primitive.target_valid(t)) 631 return false; 632 } 633 634 return true; 635 } 636 637 static void set_kdamond_stop(struct damon_ctx *ctx) 638 { 639 mutex_lock(&ctx->kdamond_lock); 640 ctx->kdamond_stop = true; 641 mutex_unlock(&ctx->kdamond_lock); 642 } 643 644 /* 645 * The monitoring daemon that runs as a kernel thread 646 */ 647 static int kdamond_fn(void *data) 648 { 649 struct damon_ctx *ctx = (struct damon_ctx *)data; 650 struct damon_target *t; 651 struct damon_region *r, *next; 652 unsigned int max_nr_accesses = 0; 653 unsigned long sz_limit = 0; 654 655 mutex_lock(&ctx->kdamond_lock); 656 pr_info("kdamond (%d) starts\n", ctx->kdamond->pid); 657 mutex_unlock(&ctx->kdamond_lock); 658 659 if (ctx->primitive.init) 660 ctx->primitive.init(ctx); 661 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 662 set_kdamond_stop(ctx); 663 664 sz_limit = damon_region_sz_limit(ctx); 665 666 while (!kdamond_need_stop(ctx)) { 667 if (ctx->primitive.prepare_access_checks) 668 ctx->primitive.prepare_access_checks(ctx); 669 if (ctx->callback.after_sampling && 670 ctx->callback.after_sampling(ctx)) 671 set_kdamond_stop(ctx); 672 673 usleep_range(ctx->sample_interval, ctx->sample_interval + 1); 674 675 if (ctx->primitive.check_accesses) 676 max_nr_accesses = ctx->primitive.check_accesses(ctx); 677 678 if (kdamond_aggregate_interval_passed(ctx)) { 679 kdamond_merge_regions(ctx, 680 max_nr_accesses / 10, 681 sz_limit); 682 if (ctx->callback.after_aggregation && 683 ctx->callback.after_aggregation(ctx)) 684 set_kdamond_stop(ctx); 685 kdamond_reset_aggregated(ctx); 686 kdamond_split_regions(ctx); 687 if (ctx->primitive.reset_aggregated) 688 ctx->primitive.reset_aggregated(ctx); 689 } 690 691 if (kdamond_need_update_primitive(ctx)) { 692 if (ctx->primitive.update) 693 ctx->primitive.update(ctx); 694 sz_limit = damon_region_sz_limit(ctx); 695 } 696 } 697 damon_for_each_target(t, ctx) { 698 damon_for_each_region_safe(r, next, t) 699 damon_destroy_region(r, t); 700 } 701 702 if (ctx->callback.before_terminate && 703 ctx->callback.before_terminate(ctx)) 704 set_kdamond_stop(ctx); 705 if (ctx->primitive.cleanup) 706 ctx->primitive.cleanup(ctx); 707 708 pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid); 709 mutex_lock(&ctx->kdamond_lock); 710 ctx->kdamond = NULL; 711 mutex_unlock(&ctx->kdamond_lock); 712 713 mutex_lock(&damon_lock); 714 nr_running_ctxs--; 715 mutex_unlock(&damon_lock); 716 717 do_exit(0); 718 } 719 720 #include "core-test.h" 721