1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>) 4 * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>) 5 * Partially based on the rdma and misc controllers, which bear the following copyrights: 6 * 7 * Copyright 2020 Google LLC 8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> 9 */ 10 11 #include <linux/cgroup.h> 12 #include <linux/cgroup_dmem.h> 13 #include <linux/list.h> 14 #include <linux/mutex.h> 15 #include <linux/page_counter.h> 16 #include <linux/parser.h> 17 #include <linux/slab.h> 18 19 struct dmem_cgroup_region { 20 /** 21 * @ref: References keeping the region alive. 22 * Keeps the region reference alive after a succesful RCU lookup. 23 */ 24 struct kref ref; 25 26 /** @rcu: RCU head for freeing */ 27 struct rcu_head rcu; 28 29 /** 30 * @region_node: Linked into &dmem_cgroup_regions list. 31 * Protected by RCU and global spinlock. 32 */ 33 struct list_head region_node; 34 35 /** 36 * @pools: List of pools linked to this region. 37 * Protected by global spinlock only 38 */ 39 struct list_head pools; 40 41 /** @size: Size of region, in bytes */ 42 u64 size; 43 44 /** @name: Name describing the node, set by dmem_cgroup_register_region */ 45 char *name; 46 47 /** 48 * @unregistered: Whether the region is unregistered by its caller. 49 * No new pools should be added to the region afterwards. 50 */ 51 bool unregistered; 52 }; 53 54 struct dmemcg_state { 55 struct cgroup_subsys_state css; 56 57 struct list_head pools; 58 }; 59 60 struct dmem_cgroup_pool_state { 61 struct dmem_cgroup_region *region; 62 struct dmemcg_state *cs; 63 64 /* css node, RCU protected against region teardown */ 65 struct list_head css_node; 66 67 /* dev node, no RCU protection required */ 68 struct list_head region_node; 69 70 struct rcu_head rcu; 71 72 struct page_counter cnt; 73 74 bool inited; 75 }; 76 77 /* 78 * 3 operations require locking protection: 79 * - Registering and unregistering region to/from list, requires global lock. 80 * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed. 81 * - Adding a dmem_cgroup_pool_state to a region list. 82 * 83 * Since for the most common operations RCU provides enough protection, I 84 * do not think more granular locking makes sense. Most protection is offered 85 * by RCU and the lockless operating page_counter. 86 */ 87 static DEFINE_SPINLOCK(dmemcg_lock); 88 static LIST_HEAD(dmem_cgroup_regions); 89 90 static inline struct dmemcg_state * 91 css_to_dmemcs(struct cgroup_subsys_state *css) 92 { 93 return container_of(css, struct dmemcg_state, css); 94 } 95 96 static inline struct dmemcg_state *get_current_dmemcs(void) 97 { 98 return css_to_dmemcs(task_get_css(current, dmem_cgrp_id)); 99 } 100 101 static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg) 102 { 103 return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL; 104 } 105 106 static void free_cg_pool(struct dmem_cgroup_pool_state *pool) 107 { 108 list_del(&pool->region_node); 109 kfree(pool); 110 } 111 112 static void 113 set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val) 114 { 115 page_counter_set_min(&pool->cnt, val); 116 } 117 118 static void 119 set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val) 120 { 121 page_counter_set_low(&pool->cnt, val); 122 } 123 124 static void 125 set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val) 126 { 127 page_counter_set_max(&pool->cnt, val); 128 } 129 130 static u64 get_resource_low(struct dmem_cgroup_pool_state *pool) 131 { 132 return pool ? READ_ONCE(pool->cnt.low) : 0; 133 } 134 135 static u64 get_resource_min(struct dmem_cgroup_pool_state *pool) 136 { 137 return pool ? READ_ONCE(pool->cnt.min) : 0; 138 } 139 140 static u64 get_resource_max(struct dmem_cgroup_pool_state *pool) 141 { 142 return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX; 143 } 144 145 static u64 get_resource_current(struct dmem_cgroup_pool_state *pool) 146 { 147 return pool ? page_counter_read(&pool->cnt) : 0; 148 } 149 150 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool) 151 { 152 set_resource_min(rpool, 0); 153 set_resource_low(rpool, 0); 154 set_resource_max(rpool, PAGE_COUNTER_MAX); 155 } 156 157 static void dmemcs_offline(struct cgroup_subsys_state *css) 158 { 159 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 160 struct dmem_cgroup_pool_state *pool; 161 162 rcu_read_lock(); 163 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node) 164 reset_all_resource_limits(pool); 165 rcu_read_unlock(); 166 } 167 168 static void dmemcs_free(struct cgroup_subsys_state *css) 169 { 170 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 171 struct dmem_cgroup_pool_state *pool, *next; 172 173 spin_lock(&dmemcg_lock); 174 list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) { 175 /* 176 *The pool is dead and all references are 0, 177 * no need for RCU protection with list_del_rcu or freeing. 178 */ 179 list_del(&pool->css_node); 180 free_cg_pool(pool); 181 } 182 spin_unlock(&dmemcg_lock); 183 184 kfree(dmemcs); 185 } 186 187 static struct cgroup_subsys_state * 188 dmemcs_alloc(struct cgroup_subsys_state *parent_css) 189 { 190 struct dmemcg_state *dmemcs = kzalloc(sizeof(*dmemcs), GFP_KERNEL); 191 if (!dmemcs) 192 return ERR_PTR(-ENOMEM); 193 194 INIT_LIST_HEAD(&dmemcs->pools); 195 return &dmemcs->css; 196 } 197 198 static struct dmem_cgroup_pool_state * 199 find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region) 200 { 201 struct dmem_cgroup_pool_state *pool; 202 203 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock)) 204 if (pool->region == region) 205 return pool; 206 207 return NULL; 208 } 209 210 static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool) 211 { 212 if (!pool->cnt.parent) 213 return NULL; 214 215 return container_of(pool->cnt.parent, typeof(*pool), cnt); 216 } 217 218 static void 219 dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool, 220 struct dmem_cgroup_pool_state *test_pool) 221 { 222 struct page_counter *climit; 223 struct cgroup_subsys_state *css, *next_css; 224 struct dmemcg_state *dmemcg_iter; 225 struct dmem_cgroup_pool_state *pool, *parent_pool; 226 bool found_descendant; 227 228 climit = &limit_pool->cnt; 229 230 rcu_read_lock(); 231 parent_pool = pool = limit_pool; 232 css = &limit_pool->cs->css; 233 234 /* 235 * This logic is roughly equivalent to css_foreach_descendant_pre, 236 * except we also track the parent pool to find out which pool we need 237 * to calculate protection values for. 238 * 239 * We can stop the traversal once we find test_pool among the 240 * descendants since we don't really care about any others. 241 */ 242 while (pool != test_pool) { 243 next_css = css_next_child(NULL, css); 244 if (next_css) { 245 parent_pool = pool; 246 } else { 247 while (css != &limit_pool->cs->css) { 248 next_css = css_next_child(css, css->parent); 249 if (next_css) 250 break; 251 css = css->parent; 252 parent_pool = pool_parent(parent_pool); 253 } 254 /* 255 * We can only hit this when test_pool is not a 256 * descendant of limit_pool. 257 */ 258 if (WARN_ON_ONCE(css == &limit_pool->cs->css)) 259 break; 260 } 261 css = next_css; 262 263 found_descendant = false; 264 dmemcg_iter = container_of(css, struct dmemcg_state, css); 265 266 list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) { 267 if (pool_parent(pool) == parent_pool) { 268 found_descendant = true; 269 break; 270 } 271 } 272 if (!found_descendant) 273 continue; 274 275 page_counter_calculate_protection( 276 climit, &pool->cnt, true); 277 } 278 rcu_read_unlock(); 279 } 280 281 /** 282 * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool 283 * @dev: &dmem_cgroup_region 284 * @index: The index number of the region being tested. 285 * @limit_pool: The pool for which we hit limits 286 * @test_pool: The pool for which to test 287 * @ignore_low: Whether we have to respect low watermarks. 288 * @ret_hit_low: Pointer to whether it makes sense to consider low watermark. 289 * 290 * This function returns true if we can evict from @test_pool, false if not. 291 * When returning false and @ignore_low is false, @ret_hit_low may 292 * be set to true to indicate this function can be retried with @ignore_low 293 * set to true. 294 * 295 * Return: bool 296 */ 297 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, 298 struct dmem_cgroup_pool_state *test_pool, 299 bool ignore_low, bool *ret_hit_low) 300 { 301 struct dmem_cgroup_pool_state *pool = test_pool; 302 struct page_counter *climit, *ctest; 303 u64 used, min, low; 304 305 /* Can always evict from current pool, despite limits */ 306 if (limit_pool == test_pool) 307 return true; 308 309 if (limit_pool) { 310 if (!parent_dmemcs(limit_pool->cs)) 311 return true; 312 313 for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool)) 314 {} 315 316 if (!pool) 317 return false; 318 } else { 319 /* 320 * If there is no cgroup limiting memory usage, use the root 321 * cgroup instead for limit calculations. 322 */ 323 for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool)) 324 {} 325 } 326 327 climit = &limit_pool->cnt; 328 ctest = &test_pool->cnt; 329 330 dmem_cgroup_calculate_protection(limit_pool, test_pool); 331 332 used = page_counter_read(ctest); 333 min = READ_ONCE(ctest->emin); 334 335 if (used <= min) 336 return false; 337 338 if (!ignore_low) { 339 low = READ_ONCE(ctest->elow); 340 if (used > low) 341 return true; 342 343 *ret_hit_low = true; 344 return false; 345 } 346 return true; 347 } 348 EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable); 349 350 static struct dmem_cgroup_pool_state * 351 alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 352 struct dmem_cgroup_pool_state **allocpool) 353 { 354 struct dmemcg_state *parent = parent_dmemcs(dmemcs); 355 struct dmem_cgroup_pool_state *pool, *ppool = NULL; 356 357 if (!*allocpool) { 358 pool = kzalloc(sizeof(*pool), GFP_NOWAIT); 359 if (!pool) 360 return ERR_PTR(-ENOMEM); 361 } else { 362 pool = *allocpool; 363 *allocpool = NULL; 364 } 365 366 pool->region = region; 367 pool->cs = dmemcs; 368 369 if (parent) 370 ppool = find_cg_pool_locked(parent, region); 371 372 page_counter_init(&pool->cnt, 373 ppool ? &ppool->cnt : NULL, true); 374 reset_all_resource_limits(pool); 375 376 list_add_tail_rcu(&pool->css_node, &dmemcs->pools); 377 list_add_tail(&pool->region_node, ®ion->pools); 378 379 if (!parent) 380 pool->inited = true; 381 else 382 pool->inited = ppool ? ppool->inited : false; 383 return pool; 384 } 385 386 static struct dmem_cgroup_pool_state * 387 get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 388 struct dmem_cgroup_pool_state **allocpool) 389 { 390 struct dmem_cgroup_pool_state *pool, *ppool, *retpool; 391 struct dmemcg_state *p, *pp; 392 393 /* 394 * Recursively create pool, we may not initialize yet on 395 * recursion, this is done as a separate step. 396 */ 397 for (p = dmemcs; p; p = parent_dmemcs(p)) { 398 pool = find_cg_pool_locked(p, region); 399 if (!pool) 400 pool = alloc_pool_single(p, region, allocpool); 401 402 if (IS_ERR(pool)) 403 return pool; 404 405 if (p == dmemcs && pool->inited) 406 return pool; 407 408 if (pool->inited) 409 break; 410 } 411 412 retpool = pool = find_cg_pool_locked(dmemcs, region); 413 for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) { 414 if (pool->inited) 415 break; 416 417 /* ppool was created if it didn't exist by above loop. */ 418 ppool = find_cg_pool_locked(pp, region); 419 420 /* Fix up parent links, mark as inited. */ 421 pool->cnt.parent = &ppool->cnt; 422 pool->inited = true; 423 424 pool = ppool; 425 } 426 427 return retpool; 428 } 429 430 static void dmemcg_free_rcu(struct rcu_head *rcu) 431 { 432 struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu); 433 struct dmem_cgroup_pool_state *pool, *next; 434 435 list_for_each_entry_safe(pool, next, ®ion->pools, region_node) 436 free_cg_pool(pool); 437 kfree(region->name); 438 kfree(region); 439 } 440 441 static void dmemcg_free_region(struct kref *ref) 442 { 443 struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref); 444 445 call_rcu(&cgregion->rcu, dmemcg_free_rcu); 446 } 447 448 /** 449 * dmem_cgroup_unregister_region() - Unregister a previously registered region. 450 * @region: The region to unregister. 451 * 452 * This function undoes dmem_cgroup_register_region. 453 */ 454 void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) 455 { 456 struct list_head *entry; 457 458 if (!region) 459 return; 460 461 spin_lock(&dmemcg_lock); 462 463 /* Remove from global region list */ 464 list_del_rcu(®ion->region_node); 465 466 list_for_each_rcu(entry, ®ion->pools) { 467 struct dmem_cgroup_pool_state *pool = 468 container_of(entry, typeof(*pool), region_node); 469 470 list_del_rcu(&pool->css_node); 471 } 472 473 /* 474 * Ensure any RCU based lookups fail. Additionally, 475 * no new pools should be added to the dead region 476 * by get_cg_pool_unlocked. 477 */ 478 region->unregistered = true; 479 spin_unlock(&dmemcg_lock); 480 481 kref_put(®ion->ref, dmemcg_free_region); 482 } 483 EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region); 484 485 /** 486 * dmem_cgroup_register_region() - Register a regions for dev cgroup. 487 * @size: Size of region to register, in bytes. 488 * @fmt: Region parameters to register 489 * 490 * This function registers a node in the dmem cgroup with the 491 * name given. After calling this function, the region can be 492 * used for allocations. 493 * 494 * Return: NULL or a struct on success, PTR_ERR on failure. 495 */ 496 struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...) 497 { 498 struct dmem_cgroup_region *ret; 499 char *region_name; 500 va_list ap; 501 502 if (!size) 503 return NULL; 504 505 va_start(ap, fmt); 506 region_name = kvasprintf(GFP_KERNEL, fmt, ap); 507 va_end(ap); 508 if (!region_name) 509 return ERR_PTR(-ENOMEM); 510 511 ret = kzalloc(sizeof(*ret), GFP_KERNEL); 512 if (!ret) { 513 kfree(region_name); 514 return ERR_PTR(-ENOMEM); 515 } 516 517 INIT_LIST_HEAD(&ret->pools); 518 ret->name = region_name; 519 ret->size = size; 520 kref_init(&ret->ref); 521 522 spin_lock(&dmemcg_lock); 523 list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions); 524 spin_unlock(&dmemcg_lock); 525 526 return ret; 527 } 528 EXPORT_SYMBOL_GPL(dmem_cgroup_register_region); 529 530 static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name) 531 { 532 struct dmem_cgroup_region *region; 533 534 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock)) 535 if (!strcmp(name, region->name) && 536 kref_get_unless_zero(®ion->ref)) 537 return region; 538 539 return NULL; 540 } 541 542 /** 543 * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state 544 * @pool: &dmem_cgroup_pool_state 545 * 546 * Called to drop a reference to the limiting pool returned by 547 * dmem_cgroup_try_charge(). 548 */ 549 void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) 550 { 551 if (pool) 552 css_put(&pool->cs->css); 553 } 554 EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put); 555 556 static struct dmem_cgroup_pool_state * 557 get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) 558 { 559 struct dmem_cgroup_pool_state *pool, *allocpool = NULL; 560 561 /* fastpath lookup? */ 562 rcu_read_lock(); 563 pool = find_cg_pool_locked(cg, region); 564 if (pool && !READ_ONCE(pool->inited)) 565 pool = NULL; 566 rcu_read_unlock(); 567 568 while (!pool) { 569 spin_lock(&dmemcg_lock); 570 if (!region->unregistered) 571 pool = get_cg_pool_locked(cg, region, &allocpool); 572 else 573 pool = ERR_PTR(-ENODEV); 574 spin_unlock(&dmemcg_lock); 575 576 if (pool == ERR_PTR(-ENOMEM)) { 577 pool = NULL; 578 if (WARN_ON(allocpool)) 579 continue; 580 581 allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL); 582 if (allocpool) { 583 pool = NULL; 584 continue; 585 } 586 } 587 } 588 589 kfree(allocpool); 590 return pool; 591 } 592 593 /** 594 * dmem_cgroup_uncharge() - Uncharge a pool. 595 * @pool: Pool to uncharge. 596 * @size: Size to uncharge. 597 * 598 * Undoes the effects of dmem_cgroup_try_charge. 599 * Must be called with the returned pool as argument, 600 * and same @index and @size. 601 */ 602 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) 603 { 604 if (!pool) 605 return; 606 607 page_counter_uncharge(&pool->cnt, size); 608 css_put(&pool->cs->css); 609 } 610 EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge); 611 612 /** 613 * dmem_cgroup_try_charge() - Try charging a new allocation to a region. 614 * @dev: Device to charge 615 * @size: Size (in bytes) to charge. 616 * @ret_pool: On succesfull allocation, the pool that is charged. 617 * @ret_limit_pool: On a failed allocation, the limiting pool. 618 * 619 * This function charges the current pool for @dev with region at @index for a 620 * size of @size bytes. 621 * 622 * If the function succeeds, @ret_pool is set, which must be passed to 623 * dmem_cgroup_uncharge() when undoing the allocation. 624 * 625 * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it 626 * will be set to the pool for which the limit is hit. This can be used for 627 * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed 628 * with @dmem_cgroup_pool_state_put(). 629 * 630 * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure. 631 */ 632 int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, 633 struct dmem_cgroup_pool_state **ret_pool, 634 struct dmem_cgroup_pool_state **ret_limit_pool) 635 { 636 struct dmemcg_state *cg; 637 struct dmem_cgroup_pool_state *pool; 638 struct page_counter *fail; 639 int ret; 640 641 *ret_pool = NULL; 642 if (ret_limit_pool) 643 *ret_limit_pool = NULL; 644 645 /* 646 * hold on to css, as cgroup can be removed but resource 647 * accounting happens on css. 648 */ 649 cg = get_current_dmemcs(); 650 651 pool = get_cg_pool_unlocked(cg, region); 652 if (IS_ERR(pool)) { 653 ret = PTR_ERR(pool); 654 goto err; 655 } 656 657 if (!page_counter_try_charge(&pool->cnt, size, &fail)) { 658 if (ret_limit_pool) { 659 *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt); 660 css_get(&(*ret_limit_pool)->cs->css); 661 } 662 ret = -EAGAIN; 663 goto err; 664 } 665 666 /* On success, reference from get_current_dmemcs is transferred to *ret_pool */ 667 *ret_pool = pool; 668 return 0; 669 670 err: 671 css_put(&cg->css); 672 return ret; 673 } 674 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge); 675 676 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v) 677 { 678 struct dmem_cgroup_region *region; 679 680 rcu_read_lock(); 681 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 682 seq_puts(sf, region->name); 683 seq_printf(sf, " %llu\n", region->size); 684 } 685 rcu_read_unlock(); 686 return 0; 687 } 688 689 static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region, 690 u64 *new_limit) 691 { 692 char *end; 693 694 if (!strcmp(options, "max")) { 695 *new_limit = PAGE_COUNTER_MAX; 696 return 0; 697 } 698 699 *new_limit = memparse(options, &end); 700 if (*end != '\0') 701 return -EINVAL; 702 703 return 0; 704 } 705 706 static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, 707 char *buf, size_t nbytes, loff_t off, 708 void (*apply)(struct dmem_cgroup_pool_state *, u64)) 709 { 710 struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of)); 711 int err = 0; 712 713 while (buf && !err) { 714 struct dmem_cgroup_pool_state *pool = NULL; 715 char *options, *region_name; 716 struct dmem_cgroup_region *region; 717 u64 new_limit; 718 719 options = buf; 720 buf = strchr(buf, '\n'); 721 if (buf) 722 *buf++ = '\0'; 723 724 options = strstrip(options); 725 726 /* eat empty lines */ 727 if (!options[0]) 728 continue; 729 730 region_name = strsep(&options, " \t"); 731 if (!region_name[0]) 732 continue; 733 734 rcu_read_lock(); 735 region = dmemcg_get_region_by_name(region_name); 736 rcu_read_unlock(); 737 738 if (!region) 739 return -EINVAL; 740 741 err = dmemcg_parse_limit(options, region, &new_limit); 742 if (err < 0) 743 goto out_put; 744 745 pool = get_cg_pool_unlocked(dmemcs, region); 746 if (IS_ERR(pool)) { 747 err = PTR_ERR(pool); 748 goto out_put; 749 } 750 751 /* And commit */ 752 apply(pool, new_limit); 753 754 out_put: 755 kref_put(®ion->ref, dmemcg_free_region); 756 } 757 758 759 return err ?: nbytes; 760 } 761 762 static int dmemcg_limit_show(struct seq_file *sf, void *v, 763 u64 (*fn)(struct dmem_cgroup_pool_state *)) 764 { 765 struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf)); 766 struct dmem_cgroup_region *region; 767 768 rcu_read_lock(); 769 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 770 struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region); 771 u64 val; 772 773 seq_puts(sf, region->name); 774 775 val = fn(pool); 776 if (val < PAGE_COUNTER_MAX) 777 seq_printf(sf, " %lld\n", val); 778 else 779 seq_puts(sf, " max\n"); 780 } 781 rcu_read_unlock(); 782 783 return 0; 784 } 785 786 static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v) 787 { 788 return dmemcg_limit_show(sf, v, get_resource_current); 789 } 790 791 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v) 792 { 793 return dmemcg_limit_show(sf, v, get_resource_min); 794 } 795 796 static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of, 797 char *buf, size_t nbytes, loff_t off) 798 { 799 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min); 800 } 801 802 static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v) 803 { 804 return dmemcg_limit_show(sf, v, get_resource_low); 805 } 806 807 static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of, 808 char *buf, size_t nbytes, loff_t off) 809 { 810 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low); 811 } 812 813 static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v) 814 { 815 return dmemcg_limit_show(sf, v, get_resource_max); 816 } 817 818 static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of, 819 char *buf, size_t nbytes, loff_t off) 820 { 821 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max); 822 } 823 824 static struct cftype files[] = { 825 { 826 .name = "capacity", 827 .seq_show = dmem_cgroup_region_capacity_show, 828 .flags = CFTYPE_ONLY_ON_ROOT, 829 }, 830 { 831 .name = "current", 832 .seq_show = dmem_cgroup_region_current_show, 833 }, 834 { 835 .name = "min", 836 .write = dmem_cgroup_region_min_write, 837 .seq_show = dmem_cgroup_region_min_show, 838 .flags = CFTYPE_NOT_ON_ROOT, 839 }, 840 { 841 .name = "low", 842 .write = dmem_cgroup_region_low_write, 843 .seq_show = dmem_cgroup_region_low_show, 844 .flags = CFTYPE_NOT_ON_ROOT, 845 }, 846 { 847 .name = "max", 848 .write = dmem_cgroup_region_max_write, 849 .seq_show = dmem_cgroup_region_max_show, 850 .flags = CFTYPE_NOT_ON_ROOT, 851 }, 852 { } /* Zero entry terminates. */ 853 }; 854 855 struct cgroup_subsys dmem_cgrp_subsys = { 856 .css_alloc = dmemcs_alloc, 857 .css_free = dmemcs_free, 858 .css_offline = dmemcs_offline, 859 .legacy_cftypes = files, 860 .dfl_cftypes = files, 861 }; 862