1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>) 4 * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>) 5 * Partially based on the rdma and misc controllers, which bear the following copyrights: 6 * 7 * Copyright 2020 Google LLC 8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> 9 */ 10 11 #include <linux/cgroup.h> 12 #include <linux/cgroup_dmem.h> 13 #include <linux/list.h> 14 #include <linux/mutex.h> 15 #include <linux/page_counter.h> 16 #include <linux/parser.h> 17 #include <linux/refcount.h> 18 #include <linux/rculist.h> 19 #include <linux/slab.h> 20 21 struct dmem_cgroup_region { 22 /** 23 * @ref: References keeping the region alive. 24 * Keeps the region reference alive after a succesful RCU lookup. 25 */ 26 struct kref ref; 27 28 /** @rcu: RCU head for freeing */ 29 struct rcu_head rcu; 30 31 /** 32 * @region_node: Linked into &dmem_cgroup_regions list. 33 * Protected by RCU and global spinlock. 34 */ 35 struct list_head region_node; 36 37 /** 38 * @pools: List of pools linked to this region. 39 * Protected by global spinlock only 40 */ 41 struct list_head pools; 42 43 /** @size: Size of region, in bytes */ 44 u64 size; 45 46 /** @name: Name describing the node, set by dmem_cgroup_register_region */ 47 char *name; 48 49 /** 50 * @unregistered: Whether the region is unregistered by its caller. 51 * No new pools should be added to the region afterwards. 52 */ 53 bool unregistered; 54 }; 55 56 struct dmemcg_state { 57 struct cgroup_subsys_state css; 58 59 struct list_head pools; 60 }; 61 62 struct dmem_cgroup_pool_state { 63 struct dmem_cgroup_region *region; 64 struct dmemcg_state *cs; 65 66 /* css node, RCU protected against region teardown */ 67 struct list_head css_node; 68 69 /* dev node, no RCU protection required */ 70 struct list_head region_node; 71 72 struct rcu_head rcu; 73 74 struct page_counter cnt; 75 struct dmem_cgroup_pool_state *parent; 76 77 refcount_t ref; 78 bool inited; 79 }; 80 81 /* 82 * 3 operations require locking protection: 83 * - Registering and unregistering region to/from list, requires global lock. 84 * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed. 85 * - Adding a dmem_cgroup_pool_state to a region list. 86 * 87 * Since for the most common operations RCU provides enough protection, I 88 * do not think more granular locking makes sense. Most protection is offered 89 * by RCU and the lockless operating page_counter. 90 */ 91 static DEFINE_SPINLOCK(dmemcg_lock); 92 static LIST_HEAD(dmem_cgroup_regions); 93 94 static void dmemcg_free_region(struct kref *ref); 95 static void dmemcg_pool_free_rcu(struct rcu_head *rcu); 96 97 static inline struct dmemcg_state * 98 css_to_dmemcs(struct cgroup_subsys_state *css) 99 { 100 return container_of(css, struct dmemcg_state, css); 101 } 102 103 static inline struct dmemcg_state *get_current_dmemcs(void) 104 { 105 return css_to_dmemcs(task_get_css(current, dmem_cgrp_id)); 106 } 107 108 static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg) 109 { 110 return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL; 111 } 112 113 static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool) 114 { 115 refcount_inc(&pool->ref); 116 } 117 118 static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool) 119 { 120 return refcount_inc_not_zero(&pool->ref); 121 } 122 123 static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool) 124 { 125 if (!refcount_dec_and_test(&pool->ref)) 126 return; 127 128 call_rcu(&pool->rcu, dmemcg_pool_free_rcu); 129 } 130 131 static void dmemcg_pool_free_rcu(struct rcu_head *rcu) 132 { 133 struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu); 134 135 if (pool->parent) 136 dmemcg_pool_put(pool->parent); 137 kref_put(&pool->region->ref, dmemcg_free_region); 138 kfree(pool); 139 } 140 141 static void free_cg_pool(struct dmem_cgroup_pool_state *pool) 142 { 143 list_del(&pool->region_node); 144 dmemcg_pool_put(pool); 145 } 146 147 static void 148 set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val) 149 { 150 page_counter_set_min(&pool->cnt, val); 151 } 152 153 static void 154 set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val) 155 { 156 page_counter_set_low(&pool->cnt, val); 157 } 158 159 static void 160 set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val) 161 { 162 page_counter_set_max(&pool->cnt, val); 163 } 164 165 static u64 get_resource_low(struct dmem_cgroup_pool_state *pool) 166 { 167 return pool ? READ_ONCE(pool->cnt.low) : 0; 168 } 169 170 static u64 get_resource_min(struct dmem_cgroup_pool_state *pool) 171 { 172 return pool ? READ_ONCE(pool->cnt.min) : 0; 173 } 174 175 static u64 get_resource_max(struct dmem_cgroup_pool_state *pool) 176 { 177 return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX; 178 } 179 180 static u64 get_resource_current(struct dmem_cgroup_pool_state *pool) 181 { 182 return pool ? page_counter_read(&pool->cnt) : 0; 183 } 184 185 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool) 186 { 187 set_resource_min(rpool, 0); 188 set_resource_low(rpool, 0); 189 set_resource_max(rpool, PAGE_COUNTER_MAX); 190 } 191 192 static void dmemcs_offline(struct cgroup_subsys_state *css) 193 { 194 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 195 struct dmem_cgroup_pool_state *pool; 196 197 rcu_read_lock(); 198 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node) 199 reset_all_resource_limits(pool); 200 rcu_read_unlock(); 201 } 202 203 static void dmemcs_free(struct cgroup_subsys_state *css) 204 { 205 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 206 struct dmem_cgroup_pool_state *pool, *next; 207 208 spin_lock(&dmemcg_lock); 209 list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) { 210 /* 211 *The pool is dead and all references are 0, 212 * no need for RCU protection with list_del_rcu or freeing. 213 */ 214 list_del(&pool->css_node); 215 free_cg_pool(pool); 216 } 217 spin_unlock(&dmemcg_lock); 218 219 kfree(dmemcs); 220 } 221 222 static struct cgroup_subsys_state * 223 dmemcs_alloc(struct cgroup_subsys_state *parent_css) 224 { 225 struct dmemcg_state *dmemcs = kzalloc_obj(*dmemcs); 226 if (!dmemcs) 227 return ERR_PTR(-ENOMEM); 228 229 INIT_LIST_HEAD(&dmemcs->pools); 230 return &dmemcs->css; 231 } 232 233 static struct dmem_cgroup_pool_state * 234 find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region) 235 { 236 struct dmem_cgroup_pool_state *pool; 237 238 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock)) 239 if (pool->region == region) 240 return pool; 241 242 return NULL; 243 } 244 245 static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool) 246 { 247 if (!pool->cnt.parent) 248 return NULL; 249 250 return container_of(pool->cnt.parent, typeof(*pool), cnt); 251 } 252 253 static void 254 dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool, 255 struct dmem_cgroup_pool_state *test_pool) 256 { 257 struct page_counter *climit; 258 struct cgroup_subsys_state *css; 259 struct dmemcg_state *dmemcg_iter; 260 struct dmem_cgroup_pool_state *pool, *found_pool; 261 262 climit = &limit_pool->cnt; 263 264 rcu_read_lock(); 265 266 css_for_each_descendant_pre(css, &limit_pool->cs->css) { 267 dmemcg_iter = container_of(css, struct dmemcg_state, css); 268 found_pool = NULL; 269 270 list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) { 271 if (pool->region == limit_pool->region) { 272 found_pool = pool; 273 break; 274 } 275 } 276 if (!found_pool) 277 continue; 278 279 page_counter_calculate_protection( 280 climit, &found_pool->cnt, true); 281 282 if (found_pool == test_pool) 283 break; 284 } 285 rcu_read_unlock(); 286 } 287 288 /** 289 * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool 290 * @limit_pool: The pool for which we hit limits 291 * @test_pool: The pool for which to test 292 * @ignore_low: Whether we have to respect low watermarks. 293 * @ret_hit_low: Pointer to whether it makes sense to consider low watermark. 294 * 295 * This function returns true if we can evict from @test_pool, false if not. 296 * When returning false and @ignore_low is false, @ret_hit_low may 297 * be set to true to indicate this function can be retried with @ignore_low 298 * set to true. 299 * 300 * Return: bool 301 */ 302 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, 303 struct dmem_cgroup_pool_state *test_pool, 304 bool ignore_low, bool *ret_hit_low) 305 { 306 struct dmem_cgroup_pool_state *pool = test_pool; 307 struct page_counter *ctest; 308 u64 used, min, low; 309 310 /* Can always evict from current pool, despite limits */ 311 if (limit_pool == test_pool) 312 return true; 313 314 if (limit_pool) { 315 if (!parent_dmemcs(limit_pool->cs)) 316 return true; 317 318 for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool)) 319 {} 320 321 if (!pool) 322 return false; 323 } else { 324 /* 325 * If there is no cgroup limiting memory usage, use the root 326 * cgroup instead for limit calculations. 327 */ 328 for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool)) 329 {} 330 } 331 332 ctest = &test_pool->cnt; 333 334 dmem_cgroup_calculate_protection(limit_pool, test_pool); 335 336 used = page_counter_read(ctest); 337 min = READ_ONCE(ctest->emin); 338 339 if (used <= min) 340 return false; 341 342 if (!ignore_low) { 343 low = READ_ONCE(ctest->elow); 344 if (used > low) 345 return true; 346 347 *ret_hit_low = true; 348 return false; 349 } 350 return true; 351 } 352 EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable); 353 354 static struct dmem_cgroup_pool_state * 355 alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 356 struct dmem_cgroup_pool_state **allocpool) 357 { 358 struct dmemcg_state *parent = parent_dmemcs(dmemcs); 359 struct dmem_cgroup_pool_state *pool, *ppool = NULL; 360 361 if (!*allocpool) { 362 pool = kzalloc_obj(*pool, GFP_NOWAIT); 363 if (!pool) 364 return ERR_PTR(-ENOMEM); 365 } else { 366 pool = *allocpool; 367 *allocpool = NULL; 368 } 369 370 pool->region = region; 371 pool->cs = dmemcs; 372 373 if (parent) 374 ppool = find_cg_pool_locked(parent, region); 375 376 page_counter_init(&pool->cnt, 377 ppool ? &ppool->cnt : NULL, true); 378 reset_all_resource_limits(pool); 379 refcount_set(&pool->ref, 1); 380 kref_get(®ion->ref); 381 if (ppool && !pool->parent) { 382 pool->parent = ppool; 383 dmemcg_pool_get(ppool); 384 } 385 386 list_add_tail_rcu(&pool->css_node, &dmemcs->pools); 387 list_add_tail(&pool->region_node, ®ion->pools); 388 389 if (!parent) 390 pool->inited = true; 391 else 392 pool->inited = ppool ? ppool->inited : false; 393 return pool; 394 } 395 396 static struct dmem_cgroup_pool_state * 397 get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 398 struct dmem_cgroup_pool_state **allocpool) 399 { 400 struct dmem_cgroup_pool_state *pool, *ppool, *retpool; 401 struct dmemcg_state *p, *pp; 402 403 /* 404 * Recursively create pool, we may not initialize yet on 405 * recursion, this is done as a separate step. 406 */ 407 for (p = dmemcs; p; p = parent_dmemcs(p)) { 408 pool = find_cg_pool_locked(p, region); 409 if (!pool) 410 pool = alloc_pool_single(p, region, allocpool); 411 412 if (IS_ERR(pool)) 413 return pool; 414 415 if (p == dmemcs && pool->inited) 416 return pool; 417 418 if (pool->inited) 419 break; 420 } 421 422 retpool = pool = find_cg_pool_locked(dmemcs, region); 423 for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) { 424 if (pool->inited) 425 break; 426 427 /* ppool was created if it didn't exist by above loop. */ 428 ppool = find_cg_pool_locked(pp, region); 429 430 /* Fix up parent links, mark as inited. */ 431 pool->cnt.parent = &ppool->cnt; 432 if (ppool && !pool->parent) { 433 pool->parent = ppool; 434 dmemcg_pool_get(ppool); 435 } 436 pool->inited = true; 437 438 pool = ppool; 439 } 440 441 return retpool; 442 } 443 444 static void dmemcg_free_rcu(struct rcu_head *rcu) 445 { 446 struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu); 447 struct dmem_cgroup_pool_state *pool, *next; 448 449 list_for_each_entry_safe(pool, next, ®ion->pools, region_node) 450 free_cg_pool(pool); 451 kfree(region->name); 452 kfree(region); 453 } 454 455 static void dmemcg_free_region(struct kref *ref) 456 { 457 struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref); 458 459 call_rcu(&cgregion->rcu, dmemcg_free_rcu); 460 } 461 462 /** 463 * dmem_cgroup_unregister_region() - Unregister a previously registered region. 464 * @region: The region to unregister. 465 * 466 * This function undoes dmem_cgroup_register_region. 467 */ 468 void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) 469 { 470 struct dmem_cgroup_pool_state *pool, *next; 471 472 if (!region) 473 return; 474 475 spin_lock(&dmemcg_lock); 476 477 /* Remove from global region list */ 478 list_del_rcu(®ion->region_node); 479 480 list_for_each_entry_safe(pool, next, ®ion->pools, region_node) { 481 list_del_rcu(&pool->css_node); 482 list_del(&pool->region_node); 483 dmemcg_pool_put(pool); 484 } 485 486 /* 487 * Ensure any RCU based lookups fail. Additionally, 488 * no new pools should be added to the dead region 489 * by get_cg_pool_unlocked. 490 */ 491 region->unregistered = true; 492 spin_unlock(&dmemcg_lock); 493 494 kref_put(®ion->ref, dmemcg_free_region); 495 } 496 EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region); 497 498 /** 499 * dmem_cgroup_register_region() - Register a regions for dev cgroup. 500 * @size: Size of region to register, in bytes. 501 * @fmt: Region parameters to register 502 * 503 * This function registers a node in the dmem cgroup with the 504 * name given. After calling this function, the region can be 505 * used for allocations. 506 * 507 * Return: NULL or a struct on success, PTR_ERR on failure. 508 */ 509 struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...) 510 { 511 struct dmem_cgroup_region *ret; 512 char *region_name; 513 va_list ap; 514 515 if (!size) 516 return NULL; 517 518 va_start(ap, fmt); 519 region_name = kvasprintf(GFP_KERNEL, fmt, ap); 520 va_end(ap); 521 if (!region_name) 522 return ERR_PTR(-ENOMEM); 523 524 ret = kzalloc_obj(*ret); 525 if (!ret) { 526 kfree(region_name); 527 return ERR_PTR(-ENOMEM); 528 } 529 530 INIT_LIST_HEAD(&ret->pools); 531 ret->name = region_name; 532 ret->size = size; 533 kref_init(&ret->ref); 534 535 spin_lock(&dmemcg_lock); 536 list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions); 537 spin_unlock(&dmemcg_lock); 538 539 return ret; 540 } 541 EXPORT_SYMBOL_GPL(dmem_cgroup_register_region); 542 543 static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name) 544 { 545 struct dmem_cgroup_region *region; 546 547 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock)) 548 if (!strcmp(name, region->name) && 549 kref_get_unless_zero(®ion->ref)) 550 return region; 551 552 return NULL; 553 } 554 555 /** 556 * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state 557 * @pool: &dmem_cgroup_pool_state 558 * 559 * Called to drop a reference to the limiting pool returned by 560 * dmem_cgroup_try_charge(). 561 */ 562 void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) 563 { 564 if (pool) { 565 css_put(&pool->cs->css); 566 dmemcg_pool_put(pool); 567 } 568 } 569 EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put); 570 571 static struct dmem_cgroup_pool_state * 572 get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) 573 { 574 struct dmem_cgroup_pool_state *pool, *allocpool = NULL; 575 576 /* fastpath lookup? */ 577 rcu_read_lock(); 578 pool = find_cg_pool_locked(cg, region); 579 if (pool && !READ_ONCE(pool->inited)) 580 pool = NULL; 581 if (pool && !dmemcg_pool_tryget(pool)) 582 pool = NULL; 583 rcu_read_unlock(); 584 585 while (!pool) { 586 spin_lock(&dmemcg_lock); 587 if (!region->unregistered) 588 pool = get_cg_pool_locked(cg, region, &allocpool); 589 else 590 pool = ERR_PTR(-ENODEV); 591 if (!IS_ERR(pool)) 592 dmemcg_pool_get(pool); 593 spin_unlock(&dmemcg_lock); 594 595 if (pool == ERR_PTR(-ENOMEM)) { 596 pool = NULL; 597 if (WARN_ON(allocpool)) 598 continue; 599 600 allocpool = kzalloc_obj(*allocpool); 601 if (allocpool) { 602 pool = NULL; 603 continue; 604 } 605 } 606 } 607 608 kfree(allocpool); 609 return pool; 610 } 611 612 /** 613 * dmem_cgroup_uncharge() - Uncharge a pool. 614 * @pool: Pool to uncharge. 615 * @size: Size to uncharge. 616 * 617 * Undoes the effects of dmem_cgroup_try_charge. 618 * Must be called with the returned pool as argument, 619 * and same @index and @size. 620 */ 621 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) 622 { 623 if (!pool) 624 return; 625 626 page_counter_uncharge(&pool->cnt, size); 627 css_put(&pool->cs->css); 628 dmemcg_pool_put(pool); 629 } 630 EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge); 631 632 /** 633 * dmem_cgroup_try_charge() - Try charging a new allocation to a region. 634 * @region: dmem region to charge 635 * @size: Size (in bytes) to charge. 636 * @ret_pool: On succesfull allocation, the pool that is charged. 637 * @ret_limit_pool: On a failed allocation, the limiting pool. 638 * 639 * This function charges the @region region for a size of @size bytes. 640 * 641 * If the function succeeds, @ret_pool is set, which must be passed to 642 * dmem_cgroup_uncharge() when undoing the allocation. 643 * 644 * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it 645 * will be set to the pool for which the limit is hit. This can be used for 646 * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed 647 * with @dmem_cgroup_pool_state_put(). 648 * 649 * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure. 650 */ 651 int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, 652 struct dmem_cgroup_pool_state **ret_pool, 653 struct dmem_cgroup_pool_state **ret_limit_pool) 654 { 655 struct dmemcg_state *cg; 656 struct dmem_cgroup_pool_state *pool; 657 struct page_counter *fail; 658 int ret; 659 660 *ret_pool = NULL; 661 if (ret_limit_pool) 662 *ret_limit_pool = NULL; 663 664 /* 665 * hold on to css, as cgroup can be removed but resource 666 * accounting happens on css. 667 */ 668 cg = get_current_dmemcs(); 669 670 pool = get_cg_pool_unlocked(cg, region); 671 if (IS_ERR(pool)) { 672 ret = PTR_ERR(pool); 673 goto err; 674 } 675 676 if (!page_counter_try_charge(&pool->cnt, size, &fail)) { 677 if (ret_limit_pool) { 678 *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt); 679 css_get(&(*ret_limit_pool)->cs->css); 680 dmemcg_pool_get(*ret_limit_pool); 681 } 682 dmemcg_pool_put(pool); 683 ret = -EAGAIN; 684 goto err; 685 } 686 687 /* On success, reference from get_current_dmemcs is transferred to *ret_pool */ 688 *ret_pool = pool; 689 return 0; 690 691 err: 692 css_put(&cg->css); 693 return ret; 694 } 695 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge); 696 697 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v) 698 { 699 struct dmem_cgroup_region *region; 700 701 rcu_read_lock(); 702 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 703 seq_puts(sf, region->name); 704 seq_printf(sf, " %llu\n", region->size); 705 } 706 rcu_read_unlock(); 707 return 0; 708 } 709 710 static int dmemcg_parse_limit(char *options, u64 *new_limit) 711 { 712 char *end; 713 714 if (!strcmp(options, "max")) { 715 *new_limit = PAGE_COUNTER_MAX; 716 return 0; 717 } 718 719 *new_limit = memparse(options, &end); 720 if (*end != '\0') 721 return -EINVAL; 722 723 return 0; 724 } 725 726 static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, 727 char *buf, size_t nbytes, loff_t off, 728 void (*apply)(struct dmem_cgroup_pool_state *, u64)) 729 { 730 struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of)); 731 int err = 0; 732 733 while (buf && !err) { 734 struct dmem_cgroup_pool_state *pool = NULL; 735 char *options, *region_name; 736 struct dmem_cgroup_region *region; 737 u64 new_limit; 738 739 options = buf; 740 buf = strchr(buf, '\n'); 741 if (buf) 742 *buf++ = '\0'; 743 744 options = strstrip(options); 745 746 /* eat empty lines */ 747 if (!options[0]) 748 continue; 749 750 region_name = strsep(&options, " \t"); 751 if (!region_name[0]) 752 continue; 753 754 if (!options || !*options) 755 return -EINVAL; 756 757 rcu_read_lock(); 758 region = dmemcg_get_region_by_name(region_name); 759 rcu_read_unlock(); 760 761 if (!region) 762 return -EINVAL; 763 764 err = dmemcg_parse_limit(options, &new_limit); 765 if (err < 0) 766 goto out_put; 767 768 pool = get_cg_pool_unlocked(dmemcs, region); 769 if (IS_ERR(pool)) { 770 err = PTR_ERR(pool); 771 goto out_put; 772 } 773 774 /* And commit */ 775 apply(pool, new_limit); 776 dmemcg_pool_put(pool); 777 778 out_put: 779 kref_put(®ion->ref, dmemcg_free_region); 780 } 781 782 783 return err ?: nbytes; 784 } 785 786 static int dmemcg_limit_show(struct seq_file *sf, void *v, 787 u64 (*fn)(struct dmem_cgroup_pool_state *)) 788 { 789 struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf)); 790 struct dmem_cgroup_region *region; 791 792 rcu_read_lock(); 793 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 794 struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region); 795 u64 val; 796 797 seq_puts(sf, region->name); 798 799 val = fn(pool); 800 if (val < PAGE_COUNTER_MAX) 801 seq_printf(sf, " %lld\n", val); 802 else 803 seq_puts(sf, " max\n"); 804 } 805 rcu_read_unlock(); 806 807 return 0; 808 } 809 810 static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v) 811 { 812 return dmemcg_limit_show(sf, v, get_resource_current); 813 } 814 815 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v) 816 { 817 return dmemcg_limit_show(sf, v, get_resource_min); 818 } 819 820 static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of, 821 char *buf, size_t nbytes, loff_t off) 822 { 823 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min); 824 } 825 826 static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v) 827 { 828 return dmemcg_limit_show(sf, v, get_resource_low); 829 } 830 831 static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of, 832 char *buf, size_t nbytes, loff_t off) 833 { 834 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low); 835 } 836 837 static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v) 838 { 839 return dmemcg_limit_show(sf, v, get_resource_max); 840 } 841 842 static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of, 843 char *buf, size_t nbytes, loff_t off) 844 { 845 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max); 846 } 847 848 static struct cftype files[] = { 849 { 850 .name = "capacity", 851 .seq_show = dmem_cgroup_region_capacity_show, 852 .flags = CFTYPE_ONLY_ON_ROOT, 853 }, 854 { 855 .name = "current", 856 .seq_show = dmem_cgroup_region_current_show, 857 }, 858 { 859 .name = "min", 860 .write = dmem_cgroup_region_min_write, 861 .seq_show = dmem_cgroup_region_min_show, 862 .flags = CFTYPE_NOT_ON_ROOT, 863 }, 864 { 865 .name = "low", 866 .write = dmem_cgroup_region_low_write, 867 .seq_show = dmem_cgroup_region_low_show, 868 .flags = CFTYPE_NOT_ON_ROOT, 869 }, 870 { 871 .name = "max", 872 .write = dmem_cgroup_region_max_write, 873 .seq_show = dmem_cgroup_region_max_show, 874 .flags = CFTYPE_NOT_ON_ROOT, 875 }, 876 { } /* Zero entry terminates. */ 877 }; 878 879 struct cgroup_subsys dmem_cgrp_subsys = { 880 .css_alloc = dmemcs_alloc, 881 .css_free = dmemcs_free, 882 .css_offline = dmemcs_offline, 883 .legacy_cftypes = files, 884 .dfl_cftypes = files, 885 }; 886