1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * DAMON api 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #ifndef _DAMON_H_ 9 #define _DAMON_H_ 10 11 #include <linux/math64.h> 12 #include <linux/memcontrol.h> 13 #include <linux/mutex.h> 14 #include <linux/prandom.h> 15 #include <linux/time64.h> 16 #include <linux/types.h> 17 18 /* Minimal region size. Every damon_region is aligned by this. */ 19 #define DAMON_MIN_REGION_SZ PAGE_SIZE 20 /* Maximum number of monitoring probes. */ 21 #define DAMON_MAX_PROBES (4) 22 /* Max priority score for DAMON-based operation schemes */ 23 #define DAMOS_MAX_SCORE (99) 24 25 /** 26 * struct damon_addr_range - Represents an address region of [@start, @end). 27 * @start: Start address of the region (inclusive). 28 * @end: End address of the region (exclusive). 29 */ 30 struct damon_addr_range { 31 unsigned long start; 32 unsigned long end; 33 }; 34 35 /** 36 * struct damon_size_range - Represents size for filter to operate on [@min, @max]. 37 * @min: Min size (inclusive). 38 * @max: Max size (inclusive). 39 */ 40 struct damon_size_range { 41 unsigned long min; 42 unsigned long max; 43 }; 44 45 /** 46 * struct damon_region - Represents a monitoring target region. 47 * @ar: The address range of the region. 48 * @sampling_addr: Address of the sample for the next access check. 49 * @nr_accesses: Access frequency of this region. 50 * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for 51 * each sampling interval. 52 * @probe_hits: Number of probe-positive region samples. 53 * @list: List head for siblings. 54 * @age: Age of this region. 55 * 56 * For any use case, @ar should be non-zero positive size. 57 * 58 * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be 59 * increased for every &damon_attrs->sample_interval if an access to the region 60 * during the last sampling interval is found. The update of this field should 61 * not be done with direct access but with the helper function, 62 * damon_update_region_access_rate(). 63 * 64 * @nr_accesses_bp is another representation of @nr_accesses in basis point 65 * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a 66 * manner similar to moving sum. By the algorithm, this value becomes 67 * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can 68 * be used when the aggregation interval is too huge and therefore cannot wait 69 * for it before getting the access monitoring results. 70 * 71 * @age is initially zero, increased for each aggregation interval, and reset 72 * to zero again if the access frequency is significantly changed. If two 73 * regions are merged into a new region, both @nr_accesses and @age of the new 74 * region are set as region size-weighted average of those of the two regions. 75 */ 76 struct damon_region { 77 struct damon_addr_range ar; 78 unsigned long sampling_addr; 79 unsigned int nr_accesses; 80 unsigned int nr_accesses_bp; 81 unsigned char probe_hits[DAMON_MAX_PROBES]; 82 struct list_head list; 83 84 unsigned int age; 85 /* private: Internal value for age calculation. */ 86 unsigned int last_nr_accesses; 87 }; 88 89 /** 90 * struct damon_target - Represents a monitoring target. 91 * @pid: The PID of the virtual address space to monitor. 92 * @nr_regions: Number of monitoring target regions of this target. 93 * @regions_list: Head of the monitoring target regions of this target. 94 * @list: List head for siblings. 95 * @obsolete: Whether the commit destination target is obsolete. 96 * 97 * Each monitoring context could have multiple targets. For example, a context 98 * for virtual memory address spaces could have multiple target processes. The 99 * @pid should be set for appropriate &struct damon_operations including the 100 * virtual address spaces monitoring operations. 101 * 102 * @obsolete is used only for damon_commit_targets() source targets, to specify 103 * the matching destination targets are obsolete. Read damon_commit_targets() 104 * to see how it is handled. 105 */ 106 struct damon_target { 107 struct pid *pid; 108 unsigned int nr_regions; 109 struct list_head regions_list; 110 struct list_head list; 111 bool obsolete; 112 }; 113 114 /** 115 * enum damos_action - Represents an action of a Data Access Monitoring-based 116 * Operation Scheme. 117 * 118 * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. 119 * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. 120 * @DAMOS_PAGEOUT: Reclaim the region. 121 * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. 122 * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. 123 * @DAMOS_COLLAPSE: Call ``madvise()`` for the region with MADV_COLLAPSE. 124 * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. 125 * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. 126 * @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions. 127 * @DAMOS_MIGRATE_COLD: Migrate the regions prioritizing colder regions. 128 * @DAMOS_STAT: Do nothing but count the stat. 129 * @NR_DAMOS_ACTIONS: Total number of DAMOS actions 130 * 131 * The support of each action is up to running &struct damon_operations. 132 * Refer to 'Operation Action' section of Documentation/mm/damon/design.rst for 133 * status of the supports. 134 * 135 * Note that DAMOS_PAGEOUT doesn't trigger demotions. 136 */ 137 enum damos_action { 138 DAMOS_WILLNEED, 139 DAMOS_COLD, 140 DAMOS_PAGEOUT, 141 DAMOS_HUGEPAGE, 142 DAMOS_NOHUGEPAGE, 143 DAMOS_COLLAPSE, 144 DAMOS_LRU_PRIO, 145 DAMOS_LRU_DEPRIO, 146 DAMOS_MIGRATE_HOT, 147 DAMOS_MIGRATE_COLD, 148 DAMOS_STAT, /* Do nothing but only record the stat */ 149 NR_DAMOS_ACTIONS, 150 }; 151 152 /** 153 * enum damos_quota_goal_metric - Represents the metric to be used as the goal 154 * 155 * @DAMOS_QUOTA_USER_INPUT: User-input value. 156 * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. 157 * @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node. 158 * @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node. 159 * @DAMOS_QUOTA_NODE_MEMCG_USED_BP: MemUsed ratio of a node for a cgroup. 160 * @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup. 161 * @DAMOS_QUOTA_ACTIVE_MEM_BP: Active to total LRU memory ratio. 162 * @DAMOS_QUOTA_INACTIVE_MEM_BP: Inactive to total LRU memory ratio. 163 * @DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP: Scheme-eligible memory ratio of a 164 * node in basis points (0-10000). 165 * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. 166 * 167 * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. 168 */ 169 enum damos_quota_goal_metric { 170 DAMOS_QUOTA_USER_INPUT, 171 DAMOS_QUOTA_SOME_MEM_PSI_US, 172 DAMOS_QUOTA_NODE_MEM_USED_BP, 173 DAMOS_QUOTA_NODE_MEM_FREE_BP, 174 DAMOS_QUOTA_NODE_MEMCG_USED_BP, 175 DAMOS_QUOTA_NODE_MEMCG_FREE_BP, 176 DAMOS_QUOTA_ACTIVE_MEM_BP, 177 DAMOS_QUOTA_INACTIVE_MEM_BP, 178 DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP, 179 NR_DAMOS_QUOTA_GOAL_METRICS, 180 }; 181 182 /** 183 * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal. 184 * @metric: Metric to be used for representing the goal. 185 * @target_value: Target value of @metric to achieve with the tuning. 186 * @current_value: Current value of @metric. 187 * @last_psi_total: Last measured total PSI 188 * @nid: Node id. 189 * @memcg_id: Memcg id. 190 * @list: List head for siblings. 191 * 192 * Data structure for getting the current score of the quota tuning goal. The 193 * score is calculated by how close @current_value and @target_value are. Then 194 * the score is entered to DAMON's internal feedback loop mechanism to get the 195 * auto-tuned quota. 196 * 197 * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually 198 * entered by the user, probably inside the kdamond callbacks. Otherwise, 199 * DAMON sets @current_value with self-measured value of @metric. 200 * 201 * If @metric is DAMOS_QUOTA_NODE_MEM_{USED,FREE}_BP, @nid represents the node 202 * id of the target node to account the used/free memory. 203 * 204 * If @metric is DAMOS_QUOTA_NODE_MEMCG_{USED,FREE}_BP, @nid and @memcg_id 205 * represents the node id and the cgroup to account the used memory for. 206 */ 207 struct damos_quota_goal { 208 enum damos_quota_goal_metric metric; 209 unsigned long target_value; 210 unsigned long current_value; 211 /* metric-dependent fields */ 212 union { 213 u64 last_psi_total; 214 struct { 215 int nid; 216 u64 memcg_id; 217 }; 218 }; 219 struct list_head list; 220 }; 221 222 /** 223 * enum damos_quota_goal_tuner - Goal-based quota tuning logic. 224 * @DAMOS_QUOTA_GOAL_TUNER_CONSIST: Aim long term consistent quota. 225 * @DAMOS_QUOTA_GOAL_TUNER_TEMPORAL: Aim zero quota asap. 226 */ 227 enum damos_quota_goal_tuner { 228 DAMOS_QUOTA_GOAL_TUNER_CONSIST, 229 DAMOS_QUOTA_GOAL_TUNER_TEMPORAL, 230 }; 231 232 /** 233 * struct damos_quota - Controls the aggressiveness of the given scheme. 234 * @reset_interval: Charge reset interval in milliseconds. 235 * @ms: Maximum milliseconds that the scheme can use. 236 * @sz: Maximum bytes of memory that the action can be applied. 237 * @goals: Head of quota tuning goals (&damos_quota_goal) list. 238 * @goal_tuner: Goal-based @esz tuning algorithm to use. 239 * @esz: Effective size quota in bytes. 240 * @fail_charge_num: Failed regions charge rate numerator. 241 * @fail_charge_denom: Failed regions charge rate denominator. 242 * 243 * @weight_sz: Weight of the region's size for prioritization. 244 * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. 245 * @weight_age: Weight of the region's age for prioritization. 246 * 247 * To avoid consuming too much CPU time or IO resources for applying the 248 * &struct damos->action to large memory, DAMON allows users to set time and/or 249 * size quotas. The quotas can be set by writing non-zero values to &ms and 250 * &sz, respectively. If the time quota is set, DAMON tries to use only up to 251 * &ms milliseconds within &reset_interval for applying the action. If the 252 * size quota is set, DAMON tries to apply the action only up to &sz bytes 253 * within &reset_interval. 254 * 255 * To convince the different types of quotas and goals, DAMON internally 256 * converts those into one single size quota called "effective quota". DAMON 257 * internally uses it as the only one real quota. The conversion is made as 258 * follows. 259 * 260 * The time quota is transformed to a size quota using estimated throughput of 261 * the scheme's action. DAMON then compares it against &sz and uses smaller 262 * one as the effective quota. 263 * 264 * If @goals is not empty, DAMON calculates yet another size quota based on the 265 * goals using its internal feedback loop algorithm, for every @reset_interval. 266 * Then, if the new size quota is smaller than the effective quota, it uses the 267 * new size quota as the effective quota. 268 * 269 * The resulting effective size quota in bytes is set to @esz. 270 * 271 * For DAMOS action applying failed amount of regions, charging those same to 272 * those that the action has successfully applied may be unfair. For the 273 * reason, 'the size * @fail_charge_num / @fail_charge_denom' is charged. 274 * 275 * For selecting regions within the quota, DAMON prioritizes current scheme's 276 * target memory regions using the &struct damon_operations->get_scheme_score. 277 * You could customize the prioritization logic by setting &weight_sz, 278 * &weight_nr_accesses, and &weight_age, because monitoring operations are 279 * encouraged to respect those. 280 */ 281 struct damos_quota { 282 unsigned long reset_interval; 283 unsigned long ms; 284 unsigned long sz; 285 struct list_head goals; 286 enum damos_quota_goal_tuner goal_tuner; 287 unsigned long esz; 288 289 unsigned int fail_charge_num; 290 unsigned int fail_charge_denom; 291 292 unsigned int weight_sz; 293 unsigned int weight_nr_accesses; 294 unsigned int weight_age; 295 296 /* private: */ 297 /* For throughput estimation */ 298 unsigned long total_charged_sz; 299 unsigned long total_charged_ns; 300 301 /* For charging the quota */ 302 unsigned long charged_sz; 303 unsigned long charged_from; 304 struct damon_target *charge_target_from; 305 unsigned long charge_addr_from; 306 307 /* For prioritization */ 308 unsigned int min_score; 309 310 /* For feedback loop */ 311 unsigned long esz_bp; 312 }; 313 314 /** 315 * enum damos_wmark_metric - Represents the watermark metric. 316 * 317 * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. 318 * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. 319 * @NR_DAMOS_WMARK_METRICS: Total number of DAMOS watermark metrics 320 */ 321 enum damos_wmark_metric { 322 DAMOS_WMARK_NONE, 323 DAMOS_WMARK_FREE_MEM_RATE, 324 NR_DAMOS_WMARK_METRICS, 325 }; 326 327 /** 328 * struct damos_watermarks - Controls when a given scheme should be activated. 329 * @metric: Metric for the watermarks. 330 * @interval: Watermarks check time interval in microseconds. 331 * @high: High watermark. 332 * @mid: Middle watermark. 333 * @low: Low watermark. 334 * 335 * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active 336 * means DAMON does monitoring and applying the action of the scheme to 337 * appropriate memory regions. Else, DAMON checks &metric of the system for at 338 * least every &interval microseconds and works as below. 339 * 340 * If &metric is higher than &high, the scheme is inactivated. If &metric is 341 * between &mid and &low, the scheme is activated. If &metric is lower than 342 * &low, the scheme is inactivated. 343 */ 344 struct damos_watermarks { 345 enum damos_wmark_metric metric; 346 unsigned long interval; 347 unsigned long high; 348 unsigned long mid; 349 unsigned long low; 350 351 /* private: */ 352 bool activated; 353 }; 354 355 /** 356 * struct damos_stat - Statistics on a given scheme. 357 * @nr_tried: Total number of regions that the scheme is tried to be applied. 358 * @sz_tried: Total size of regions that the scheme is tried to be applied. 359 * @nr_applied: Total number of regions that the scheme is applied. 360 * @sz_applied: Total size of regions that the scheme is applied. 361 * @sz_ops_filter_passed: 362 * Total bytes that passed ops layer-handled DAMOS filters. 363 * @qt_exceeds: Total number of times the quota of the scheme has exceeded. 364 * @nr_snapshots: 365 * Total number of DAMON snapshots that the scheme has tried. 366 * 367 * "Tried an action to a region" in this context means the DAMOS core logic 368 * determined the region as eligible to apply the action. The access pattern 369 * (&struct damos_access_pattern), quotas (&struct damos_quota), watermarks 370 * (&struct damos_watermarks) and filters (&struct damos_filter) that handled 371 * on core logic can affect this. The core logic asks the operation set 372 * (&struct damon_operations) to apply the action to the region. 373 * 374 * "Applied an action to a region" in this context means the operation set 375 * (&struct damon_operations) successfully applied the action to the region, at 376 * least to a part of the region. The filters (&struct damos_filter) that 377 * handled on operation set layer and type of the action and pages of the 378 * region can affect this. For example, if a filter is set to exclude 379 * anonymous pages and the region has only anonymous pages, the region will be 380 * failed at applying the action. If the action is &DAMOS_PAGEOUT and all 381 * pages of the region are already paged out, the region will be failed at 382 * applying the action. 383 */ 384 struct damos_stat { 385 unsigned long nr_tried; 386 unsigned long sz_tried; 387 unsigned long nr_applied; 388 unsigned long sz_applied; 389 unsigned long sz_ops_filter_passed; 390 unsigned long qt_exceeds; 391 unsigned long nr_snapshots; 392 }; 393 394 /** 395 * enum damos_filter_type - Type of memory for &struct damos_filter 396 * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. 397 * @DAMOS_FILTER_TYPE_ACTIVE: Active pages. 398 * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. 399 * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. 400 * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. 401 * @DAMOS_FILTER_TYPE_UNMAPPED: Unmapped pages. 402 * @DAMOS_FILTER_TYPE_ADDR: Address range. 403 * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. 404 * @NR_DAMOS_FILTER_TYPES: Number of filter types. 405 * 406 * The anon pages type and memcg type filters are handled by underlying 407 * &struct damon_operations as a part of scheme action trying, and therefore 408 * accounted as 'tried'. In contrast, other types are handled by core layer 409 * before trying of the action and therefore not accounted as 'tried'. 410 * 411 * The support of the filters that handled by &struct damon_operations depend 412 * on the running &struct damon_operations. 413 * &enum DAMON_OPS_PADDR supports both anon pages type and memcg type filters, 414 * while &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR don't support any of 415 * the two types. 416 */ 417 enum damos_filter_type { 418 DAMOS_FILTER_TYPE_ANON, 419 DAMOS_FILTER_TYPE_ACTIVE, 420 DAMOS_FILTER_TYPE_MEMCG, 421 DAMOS_FILTER_TYPE_YOUNG, 422 DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, 423 DAMOS_FILTER_TYPE_UNMAPPED, 424 DAMOS_FILTER_TYPE_ADDR, 425 DAMOS_FILTER_TYPE_TARGET, 426 NR_DAMOS_FILTER_TYPES, 427 }; 428 429 /** 430 * struct damos_filter - DAMOS action target memory filter. 431 * @type: Type of the target memory. 432 * @matching: Whether this is for @type-matching memory. 433 * @allow: Whether to include or exclude the @matching memory. 434 * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. 435 * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR. 436 * @target_idx: Index of the &struct damon_target of 437 * &damon_ctx->adaptive_targets if @type is 438 * DAMOS_FILTER_TYPE_TARGET. 439 * @sz_range: Size range if @type is DAMOS_FILTER_TYPE_HUGEPAGE_SIZE. 440 * @list: List head for siblings. 441 * 442 * Before applying the &damos->action to a memory region, DAMOS checks if each 443 * byte of the region matches to this given condition and avoid applying the 444 * action if so. Support of each filter type depends on the running &struct 445 * damon_operations and the type. Refer to &enum damos_filter_type for more 446 * details. 447 */ 448 struct damos_filter { 449 enum damos_filter_type type; 450 bool matching; 451 bool allow; 452 union { 453 u64 memcg_id; 454 struct damon_addr_range addr_range; 455 int target_idx; 456 struct damon_size_range sz_range; 457 }; 458 struct list_head list; 459 }; 460 461 struct damon_ctx; 462 struct damos; 463 464 /** 465 * struct damos_walk_control - Control damos_walk(). 466 * 467 * @walk_fn: Function to be called back for each region. 468 * @data: Data that will be passed to walk functions. 469 * 470 * Control damos_walk(), which requests specific kdamond to invoke the given 471 * function to each region that eligible to apply actions of the kdamond's 472 * schemes. Refer to damos_walk() for more details. 473 */ 474 struct damos_walk_control { 475 void (*walk_fn)(void *data, struct damon_ctx *ctx, 476 struct damon_target *t, struct damon_region *r, 477 struct damos *s, unsigned long sz_filter_passed); 478 void *data; 479 /* private: internal use only */ 480 /* informs if the kdamond finished handling of the walk request */ 481 struct completion completion; 482 /* informs if the walk is canceled. */ 483 bool canceled; 484 }; 485 486 /** 487 * struct damos_access_pattern - Target access pattern of the given scheme. 488 * @min_sz_region: Minimum size of target regions. 489 * @max_sz_region: Maximum size of target regions. 490 * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. 491 * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. 492 * @min_age_region: Minimum age of target regions. 493 * @max_age_region: Maximum age of target regions. 494 */ 495 struct damos_access_pattern { 496 unsigned long min_sz_region; 497 unsigned long max_sz_region; 498 unsigned int min_nr_accesses; 499 unsigned int max_nr_accesses; 500 unsigned int min_age_region; 501 unsigned int max_age_region; 502 }; 503 504 /** 505 * struct damos_migrate_dests - Migration destination nodes and their weights. 506 * @node_id_arr: Array of migration destination node ids. 507 * @weight_arr: Array of migration weights for @node_id_arr. 508 * @nr_dests: Length of the @node_id_arr and @weight_arr arrays. 509 * 510 * @node_id_arr is an array of the ids of migration destination nodes. 511 * @weight_arr is an array of the weights for those. The weights in 512 * @weight_arr are for nodes in @node_id_arr of same array index. 513 */ 514 struct damos_migrate_dests { 515 unsigned int *node_id_arr; 516 unsigned int *weight_arr; 517 size_t nr_dests; 518 }; 519 520 /** 521 * struct damos - Represents a Data Access Monitoring-based Operation Scheme. 522 * @pattern: Access pattern of target regions. 523 * @action: &damos_action to be applied to the target regions. 524 * @apply_interval_us: The time between applying the @action. 525 * @quota: Control the aggressiveness of this scheme. 526 * @wmarks: Watermarks for automated (in)activation of this scheme. 527 * @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}". 528 * @target_nid: Destination node if @action is "migrate_{hot,cold}". 529 * @core_filters: Additional set of &struct damos_filter for &action. 530 * @ops_filters: ops layer handling &struct damos_filter objects list. 531 * @last_applied: Last @action applied ops-managing entity. 532 * @stat: Statistics of this scheme. 533 * @max_nr_snapshots: Upper limit of nr_snapshots stat. 534 * @list: List head for siblings. 535 * 536 * For each @apply_interval_us, DAMON finds regions which fit in the 537 * &pattern and applies &action to those. To avoid consuming too much 538 * CPU time or IO resources for the &action, "a is used. 539 * 540 * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. 541 * 542 * To do the work only when needed, schemes can be activated for specific 543 * system situations using &wmarks. If all schemes that registered to the 544 * monitoring context are inactive, DAMON stops monitoring either, and just 545 * repeatedly checks the watermarks. 546 * 547 * @migrate_dests specifies multiple migration target nodes with different 548 * weights for migrate_hot or migrate_cold actions. @target_nid is ignored if 549 * this is set. 550 * 551 * @target_nid is used to set the migration target node for migrate_hot or 552 * migrate_cold actions, and @migrate_dests is unset. 553 * 554 * Before applying the &action to a memory region, &struct damon_operations 555 * implementation could check pages of the region and skip &action to respect 556 * &core_filters 557 * 558 * The minimum entity that @action can be applied depends on the underlying 559 * &struct damon_operations. Since it may not be aligned with the core layer 560 * abstract, namely &struct damon_region, &struct damon_operations could apply 561 * @action to same entity multiple times. Large folios that underlying on 562 * multiple &struct damon region objects could be such examples. The &struct 563 * damon_operations can use @last_applied to avoid that. DAMOS core logic 564 * unsets @last_applied when each regions walking for applying the scheme is 565 * finished. 566 * 567 * After applying the &action to each region, &stat is updated. 568 * 569 * If &max_nr_snapshots is set as non-zero and &stat.nr_snapshots be same to or 570 * greater than it, the scheme is deactivated. 571 */ 572 struct damos { 573 struct damos_access_pattern pattern; 574 enum damos_action action; 575 unsigned long apply_interval_us; 576 /* private: internal use only */ 577 /* 578 * number of sample intervals that should be passed before applying 579 * @action 580 */ 581 unsigned long next_apply_sis; 582 /* informs if ongoing DAMOS walk for this scheme is finished */ 583 bool walk_completed; 584 /* 585 * If the current region in the filtering stage is allowed by core 586 * layer-handled filters. If true, operations layer allows it, too. 587 */ 588 bool core_filters_allowed; 589 /* whether to reject core/ops filters umatched regions */ 590 bool core_filters_default_reject; 591 bool ops_filters_default_reject; 592 /* public: */ 593 struct damos_quota quota; 594 struct damos_watermarks wmarks; 595 union { 596 struct { 597 int target_nid; 598 struct damos_migrate_dests migrate_dests; 599 }; 600 }; 601 struct list_head core_filters; 602 struct list_head ops_filters; 603 void *last_applied; 604 struct damos_stat stat; 605 unsigned long max_nr_snapshots; 606 struct list_head list; 607 }; 608 609 /** 610 * enum damon_ops_id - Identifier for each monitoring operations implementation 611 * 612 * @DAMON_OPS_VADDR: Monitoring operations for virtual address spaces 613 * @DAMON_OPS_FVADDR: Monitoring operations for only fixed ranges of virtual 614 * address spaces 615 * @DAMON_OPS_PADDR: Monitoring operations for the physical address space 616 * @NR_DAMON_OPS: Number of monitoring operations implementations 617 */ 618 enum damon_ops_id { 619 DAMON_OPS_VADDR, 620 DAMON_OPS_FVADDR, 621 DAMON_OPS_PADDR, 622 NR_DAMON_OPS, 623 }; 624 625 /** 626 * struct damon_operations - Monitoring operations for given use cases. 627 * 628 * @id: Identifier of this operations set. 629 * @init: Initialize operations-related data structures. 630 * @update: Update operations-related data structures. 631 * @prepare_access_checks: Prepare next access check of target regions. 632 * @check_accesses: Check the accesses to target regions. 633 * @apply_probes: Apply probes for each region. 634 * @get_scheme_score: Get the score of a region for a scheme. 635 * @apply_scheme: Apply a DAMON-based operation scheme. 636 * @target_valid: Determine if the target is valid. 637 * @cleanup_target: Clean up each target before deallocation. 638 * 639 * DAMON can be extended for various address spaces and usages. For this, 640 * users should register the low level operations for their target address 641 * space and usecase via the &damon_ctx.ops. Then, the monitoring thread 642 * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting 643 * the monitoring, @update after each &damon_attrs.ops_update_interval, and 644 * @check_accesses, @target_valid and @prepare_access_checks after each 645 * &damon_attrs.sample_interval. 646 * 647 * Each &struct damon_operations instance having valid @id can be registered 648 * via damon_register_ops() and selected by damon_select_ops() later. 649 * @init should initialize operations-related data structures. For example, 650 * this could be used to construct proper monitoring target regions and link 651 * those to @damon_ctx.adaptive_targets. 652 * @update should update the operations-related data structures. For example, 653 * this could be used to update monitoring target regions for current status. 654 * @prepare_access_checks should manipulate the monitoring regions to be 655 * prepared for the next access check. 656 * @check_accesses should check the accesses to each region that made after the 657 * last preparation and update the number of observed accesses of each region. 658 * It should also return max number of observed accesses that made as a result 659 * of its update. The value will be used for regions adjustment threshold. 660 * @apply_probes should apply the data attribute probes to each region and 661 * accordingly update the probe hits counter of the region. 662 * @get_scheme_score should return the priority score of a region for a scheme 663 * as an integer in [0, &DAMOS_MAX_SCORE]. 664 * @apply_scheme is called from @kdamond when a region for user provided 665 * DAMON-based operation scheme is found. It should apply the scheme's action 666 * to the region and return bytes of the region that the action is successfully 667 * applied. It should also report how many bytes of the region has passed 668 * filters (&struct damos_filter) that handled by itself. 669 * @target_valid should check whether the target is still valid for the 670 * monitoring. 671 * @cleanup_target is called before the target will be deallocated. 672 */ 673 struct damon_operations { 674 enum damon_ops_id id; 675 void (*init)(struct damon_ctx *context); 676 void (*update)(struct damon_ctx *context); 677 void (*prepare_access_checks)(struct damon_ctx *context); 678 unsigned int (*check_accesses)(struct damon_ctx *context); 679 void (*apply_probes)(struct damon_ctx *context); 680 int (*get_scheme_score)(struct damon_ctx *context, 681 struct damon_region *r, struct damos *scheme); 682 unsigned long (*apply_scheme)(struct damon_ctx *context, 683 struct damon_target *t, struct damon_region *r, 684 struct damos *scheme, unsigned long *sz_filter_passed); 685 bool (*target_valid)(struct damon_target *t); 686 void (*cleanup_target)(struct damon_target *t); 687 }; 688 689 /* 690 * struct damon_call_control - Control damon_call(). 691 * 692 * @fn: Function to be called back. 693 * @data: Data that will be passed to @fn. 694 * @repeat: Repeat invocations. 695 * @return_code: Return code from @fn invocation. 696 * @dealloc_on_cancel: If @repeat is true, de-allocate when canceled. 697 * 698 * Control damon_call(), which requests specific kdamond to invoke a given 699 * function. Refer to damon_call() for more details. 700 */ 701 struct damon_call_control { 702 int (*fn)(void *data); 703 void *data; 704 bool repeat; 705 int return_code; 706 bool dealloc_on_cancel; 707 /* private: internal use only */ 708 /* informs if the kdamond finished handling of the request */ 709 struct completion completion; 710 /* informs if the kdamond canceled @fn infocation */ 711 bool canceled; 712 /* List head for siblings. */ 713 struct list_head list; 714 }; 715 716 /** 717 * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. 718 * 719 * @access_bp: Access events observation ratio to achieve in bp. 720 * @aggrs: Number of aggregations to achieve @access_bp within. 721 * @min_sample_us: Minimum resulting sampling interval in microseconds. 722 * @max_sample_us: Maximum resulting sampling interval in microseconds. 723 * 724 * DAMON automatically tunes &damon_attrs->sample_interval and 725 * &damon_attrs->aggr_interval aiming the ratio in bp (1/10,000) of 726 * DAMON-observed access events to theoretical maximum amount within @aggrs 727 * aggregations be same to @access_bp. The logic increases 728 * &damon_attrs->aggr_interval and &damon_attrs->sampling_interval in same 729 * ratio if the current access events observation ratio is lower than the 730 * target for each @aggrs aggregations, and vice versa. 731 * 732 * If @aggrs is zero, the tuning is disabled and hence this struct is ignored. 733 */ 734 struct damon_intervals_goal { 735 unsigned long access_bp; 736 unsigned long aggrs; 737 unsigned long min_sample_us; 738 unsigned long max_sample_us; 739 }; 740 741 /** 742 * enum damon_filter_type - Type of &struct damon_filter 743 * 744 * @DAMON_FILTER_TYPE_ANON: Anonymous pages. 745 * @DAMON_FILTER_TYPE_MEMCG: Specific memcg's pages. 746 */ 747 enum damon_filter_type { 748 DAMON_FILTER_TYPE_ANON, 749 DAMON_FILTER_TYPE_MEMCG, 750 }; 751 752 /** 753 * struct damon_filter - DAMON region filter for &struct damon_probe. 754 * 755 * @type: Type of the region. 756 * @matching: Whether this filter is for the type-matching ones. 757 * @allow: Whether the @type-@matching ones should pass this filter. 758 * @memcg_id: Memcg id of the question if @type is DAMON_FILTER_MEMCG. 759 * @list: Siblings list. 760 */ 761 struct damon_filter { 762 enum damon_filter_type type; 763 bool matching; 764 bool allow; 765 union { 766 u64 memcg_id; 767 }; 768 struct list_head list; 769 }; 770 771 /** 772 * struct damon_probe - Data region attribute probe. 773 * 774 * @filters: Filters for assessing if a given region is for this probe. 775 * @list: Siblings list. 776 */ 777 struct damon_probe { 778 struct list_head filters; 779 struct list_head list; 780 }; 781 782 /** 783 * struct damon_attrs - Monitoring attributes for accuracy/overhead control. 784 * 785 * @sample_interval: The time between access samplings. 786 * @aggr_interval: The time between monitor results aggregations. 787 * @ops_update_interval: The time between monitoring operations updates. 788 * @intervals_goal: Intervals auto-tuning goal. 789 * @min_nr_regions: The minimum number of adaptive monitoring 790 * regions. 791 * @max_nr_regions: The maximum number of adaptive monitoring 792 * regions. 793 * 794 * For each @sample_interval, DAMON checks whether each region is accessed or 795 * not during the last @sample_interval. If such access is found, DAMON 796 * aggregates the information by increasing &damon_region->nr_accesses for 797 * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON 798 * also checks whether the target memory regions need update (e.g., by 799 * ``mmap()`` calls from the application, in case of virtual memory monitoring) 800 * and applies the changes for each @ops_update_interval. All time intervals 801 * are in micro-seconds. Please refer to &struct damon_operations and &struct 802 * damon_call_control for more detail. 803 */ 804 struct damon_attrs { 805 unsigned long sample_interval; 806 unsigned long aggr_interval; 807 unsigned long ops_update_interval; 808 struct damon_intervals_goal intervals_goal; 809 unsigned long min_nr_regions; 810 unsigned long max_nr_regions; 811 /* private: internal use only */ 812 /* 813 * @aggr_interval to @sample_interval ratio. 814 * Core-external components call damon_set_attrs() with &damon_attrs 815 * that this field is unset. In the case, damon_set_attrs() sets this 816 * field of resulting &damon_attrs. Core-internal components such as 817 * kdamond_tune_intervals() calls damon_set_attrs() with &damon_attrs 818 * that this field is set. In the case, damon_set_attrs() just keep 819 * it. 820 */ 821 unsigned long aggr_samples; 822 }; 823 824 /** 825 * struct damon_ctx - Represents a context for each monitoring. This is the 826 * main interface that allows users to set the attributes and get the results 827 * of the monitoring. 828 * 829 * @attrs: Monitoring attributes for accuracy/overhead control. 830 * 831 * For each monitoring context, one kernel thread for the monitoring, namely 832 * kdamond, is created. The pid of kdamond can be retrieved using 833 * damon_kdamond_pid(). 834 * 835 * Once started, kdamond runs until explicitly required to be terminated or 836 * every monitoring target is invalid. The validity of the targets is checked 837 * via the &damon_operations.target_valid of @ops. The termination can also be 838 * explicitly requested by calling damon_stop(). To know if a kdamond is 839 * running, damon_is_running() can be used. 840 * 841 * While the kdamond is running, all accesses to &struct damon_ctx from a 842 * thread other than the kdamond should be made using safe DAMON APIs, 843 * including damon_call() and damos_walk(). 844 * 845 * @ops: Set of monitoring operations for given use cases. 846 * @addr_unit: Scale factor for core to ops address conversion. 847 * @min_region_sz: Minimum region size. 848 * @pause: Pause kdamond main loop. 849 * @adaptive_targets: Head of monitoring targets (&damon_target) list. 850 * @schemes: Head of schemes (&damos) list. 851 */ 852 struct damon_ctx { 853 struct damon_attrs attrs; 854 855 /* private: internal use only */ 856 /* number of sample intervals that passed since this context started */ 857 unsigned long passed_sample_intervals; 858 /* 859 * number of sample intervals that should be passed before next 860 * aggregation 861 */ 862 unsigned long next_aggregation_sis; 863 /* 864 * number of sample intervals that should be passed before next ops 865 * update 866 */ 867 unsigned long next_ops_update_sis; 868 /* 869 * number of sample intervals that should be passed before next 870 * intervals tuning 871 */ 872 unsigned long next_intervals_tune_sis; 873 /* for waiting until the execution of the kdamond_fn is started */ 874 struct completion kdamond_started; 875 /* for scheme quotas prioritization */ 876 unsigned long *regions_score_histogram; 877 878 /* lists of &struct damon_call_control */ 879 struct list_head call_controls; 880 bool call_controls_obsolete; 881 struct mutex call_controls_lock; 882 883 struct damos_walk_control *walk_control; 884 bool walk_control_obsolete; 885 struct mutex walk_control_lock; 886 887 /* 888 * indicate if this may be corrupted. Currentonly this is set only for 889 * damon_commit_ctx() failure. 890 */ 891 bool maybe_corrupted; 892 893 /* Working thread of the given DAMON context */ 894 struct task_struct *kdamond; 895 /* Protects @kdamond field access */ 896 struct mutex kdamond_lock; 897 898 /* public: */ 899 struct damon_operations ops; 900 struct list_head probes; 901 unsigned long addr_unit; 902 unsigned long min_region_sz; 903 bool pause; 904 905 struct list_head adaptive_targets; 906 struct list_head schemes; 907 908 /* Per-ctx PRNG state for damon_rand(); kdamond is the sole consumer. */ 909 struct rnd_state rnd_state; 910 }; 911 912 /* Get a random number in [@l, @r) using @ctx's lockless PRNG. */ 913 static inline unsigned long damon_rand(struct damon_ctx *ctx, 914 unsigned long l, unsigned long r) 915 { 916 unsigned long span = r - l; 917 u64 rnd; 918 919 if (span <= U32_MAX) { 920 rnd = prandom_u32_state(&ctx->rnd_state); 921 return l + (unsigned long)((rnd * span) >> 32); 922 } 923 rnd = ((u64)prandom_u32_state(&ctx->rnd_state) << 32) | 924 prandom_u32_state(&ctx->rnd_state); 925 return l + mul_u64_u64_shr(rnd, span, 64); 926 } 927 928 static inline struct damon_region *damon_next_region(struct damon_region *r) 929 { 930 return container_of(r->list.next, struct damon_region, list); 931 } 932 933 static inline struct damon_region *damon_prev_region(struct damon_region *r) 934 { 935 return container_of(r->list.prev, struct damon_region, list); 936 } 937 938 static inline struct damon_region *damon_last_region(struct damon_target *t) 939 { 940 return list_last_entry(&t->regions_list, struct damon_region, list); 941 } 942 943 static inline struct damon_region *damon_first_region(struct damon_target *t) 944 { 945 return list_first_entry(&t->regions_list, struct damon_region, list); 946 } 947 948 static inline unsigned long damon_sz_region(struct damon_region *r) 949 { 950 return r->ar.end - r->ar.start; 951 } 952 953 #define damon_for_each_filter(f, p) \ 954 list_for_each_entry(f, &(p)->filters, list) 955 956 #define damon_for_each_filter_safe(f, next, p) \ 957 list_for_each_entry_safe(f, next, &(p)->filters, list) 958 959 #define damon_for_each_probe(p, ctx) \ 960 list_for_each_entry(p, &(ctx)->probes, list) 961 962 #define damon_for_each_probe_safe(p, next, ctx) \ 963 list_for_each_entry_safe(p, next, &(ctx)->probes, list) 964 965 #define damon_for_each_region(r, t) \ 966 list_for_each_entry(r, &(t)->regions_list, list) 967 968 #define damon_for_each_region_from(r, t) \ 969 list_for_each_entry_from(r, &(t)->regions_list, list) 970 971 #define damon_for_each_region_safe(r, next, t) \ 972 list_for_each_entry_safe(r, next, &(t)->regions_list, list) 973 974 #define damon_for_each_target(t, ctx) \ 975 list_for_each_entry(t, &(ctx)->adaptive_targets, list) 976 977 #define damon_for_each_target_safe(t, next, ctx) \ 978 list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) 979 980 #define damon_for_each_scheme(s, ctx) \ 981 list_for_each_entry(s, &(ctx)->schemes, list) 982 983 #define damon_for_each_scheme_safe(s, next, ctx) \ 984 list_for_each_entry_safe(s, next, &(ctx)->schemes, list) 985 986 #define damos_for_each_quota_goal(goal, quota) \ 987 list_for_each_entry(goal, &(quota)->goals, list) 988 989 #define damos_for_each_quota_goal_safe(goal, next, quota) \ 990 list_for_each_entry_safe(goal, next, &(quota)->goals, list) 991 992 #define damos_for_each_core_filter(f, scheme) \ 993 list_for_each_entry(f, &(scheme)->core_filters, list) 994 995 #define damos_for_each_core_filter_safe(f, next, scheme) \ 996 list_for_each_entry_safe(f, next, &(scheme)->core_filters, list) 997 998 #define damos_for_each_ops_filter(f, scheme) \ 999 list_for_each_entry(f, &(scheme)->ops_filters, list) 1000 1001 #define damos_for_each_ops_filter_safe(f, next, scheme) \ 1002 list_for_each_entry_safe(f, next, &(scheme)->ops_filters, list) 1003 1004 #ifdef CONFIG_DAMON 1005 1006 struct damon_filter *damon_new_filter(enum damon_filter_type type, 1007 bool matching, bool allow); 1008 void damon_add_filter(struct damon_probe *probe, struct damon_filter *f); 1009 void damon_destroy_filter(struct damon_filter *f); 1010 1011 struct damon_probe *damon_new_probe(void); 1012 void damon_add_probe(struct damon_ctx *ctx, struct damon_probe *probe); 1013 1014 struct damon_region *damon_new_region(unsigned long start, unsigned long end); 1015 1016 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, 1017 unsigned int nr_ranges, unsigned long min_region_sz); 1018 void damon_update_region_access_rate(struct damon_region *r, bool accessed, 1019 struct damon_attrs *attrs); 1020 1021 struct damos_filter *damos_new_filter(enum damos_filter_type type, 1022 bool matching, bool allow); 1023 void damos_add_filter(struct damos *s, struct damos_filter *f); 1024 bool damos_filter_for_ops(enum damos_filter_type type); 1025 void damos_destroy_filter(struct damos_filter *f); 1026 1027 struct damos_quota_goal *damos_new_quota_goal( 1028 enum damos_quota_goal_metric metric, 1029 unsigned long target_value); 1030 void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g); 1031 void damos_destroy_quota_goal(struct damos_quota_goal *goal); 1032 1033 struct damos *damon_new_scheme(struct damos_access_pattern *pattern, 1034 enum damos_action action, 1035 unsigned long apply_interval_us, 1036 struct damos_quota *quota, 1037 struct damos_watermarks *wmarks, 1038 int target_nid); 1039 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); 1040 void damon_destroy_scheme(struct damos *s); 1041 int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src); 1042 1043 struct damon_target *damon_new_target(void); 1044 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); 1045 bool damon_targets_empty(struct damon_ctx *ctx); 1046 void damon_free_target(struct damon_target *t); 1047 void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx); 1048 unsigned int damon_nr_regions(struct damon_target *t); 1049 1050 struct damon_ctx *damon_new_ctx(void); 1051 void damon_destroy_ctx(struct damon_ctx *ctx); 1052 int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs); 1053 void damon_set_schemes(struct damon_ctx *ctx, 1054 struct damos **schemes, ssize_t nr_schemes); 1055 int damon_commit_ctx(struct damon_ctx *old_ctx, struct damon_ctx *new_ctx); 1056 int damon_nr_running_ctxs(void); 1057 bool damon_is_registered_ops(enum damon_ops_id id); 1058 int damon_register_ops(struct damon_operations *ops); 1059 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id); 1060 1061 static inline bool damon_target_has_pid(const struct damon_ctx *ctx) 1062 { 1063 return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; 1064 } 1065 1066 static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) 1067 { 1068 /* {aggr,sample}_interval are unsigned long, hence could overflow */ 1069 return min(attrs->aggr_interval / attrs->sample_interval, 1070 (unsigned long)UINT_MAX); 1071 } 1072 1073 1074 bool damon_initialized(void); 1075 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); 1076 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); 1077 bool damon_is_running(struct damon_ctx *ctx); 1078 int damon_kdamond_pid(struct damon_ctx *ctx); 1079 1080 int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); 1081 int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); 1082 1083 int damon_set_region_system_rams_default(struct damon_target *t, 1084 unsigned long *start, unsigned long *end, 1085 unsigned long addr_unit, 1086 unsigned long min_region_sz); 1087 1088 #endif /* CONFIG_DAMON */ 1089 1090 #endif /* _DAMON_H */ 1091