1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>) 4 * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>) 5 * Partially based on the rdma and misc controllers, which bear the following copyrights: 6 * 7 * Copyright 2020 Google LLC 8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> 9 */ 10 11 #include <linux/cgroup.h> 12 #include <linux/cgroup_dmem.h> 13 #include <linux/list.h> 14 #include <linux/mutex.h> 15 #include <linux/page_counter.h> 16 #include <linux/parser.h> 17 #include <linux/refcount.h> 18 #include <linux/rculist.h> 19 #include <linux/slab.h> 20 21 struct dmem_cgroup_region { 22 /** 23 * @ref: References keeping the region alive. 24 * Keeps the region reference alive after a succesful RCU lookup. 25 */ 26 struct kref ref; 27 28 /** @rcu: RCU head for freeing */ 29 struct rcu_head rcu; 30 31 /** 32 * @region_node: Linked into &dmem_cgroup_regions list. 33 * Protected by RCU and global spinlock. 34 */ 35 struct list_head region_node; 36 37 /** 38 * @pools: List of pools linked to this region. 39 * Protected by global spinlock only 40 */ 41 struct list_head pools; 42 43 /** @size: Size of region, in bytes */ 44 u64 size; 45 46 /** @name: Name describing the node, set by dmem_cgroup_register_region */ 47 char *name; 48 49 /** 50 * @unregistered: Whether the region is unregistered by its caller. 51 * No new pools should be added to the region afterwards. 52 */ 53 bool unregistered; 54 }; 55 56 struct dmemcg_state { 57 struct cgroup_subsys_state css; 58 59 struct list_head pools; 60 }; 61 62 struct dmem_cgroup_pool_state { 63 struct dmem_cgroup_region *region; 64 struct dmemcg_state *cs; 65 66 /* css node, RCU protected against region teardown */ 67 struct list_head css_node; 68 69 /* dev node, no RCU protection required */ 70 struct list_head region_node; 71 72 struct rcu_head rcu; 73 74 struct page_counter cnt; 75 struct dmem_cgroup_pool_state *parent; 76 77 refcount_t ref; 78 bool inited; 79 }; 80 81 /* 82 * 3 operations require locking protection: 83 * - Registering and unregistering region to/from list, requires global lock. 84 * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed. 85 * - Adding a dmem_cgroup_pool_state to a region list. 86 * 87 * Since for the most common operations RCU provides enough protection, I 88 * do not think more granular locking makes sense. Most protection is offered 89 * by RCU and the lockless operating page_counter. 90 */ 91 static DEFINE_SPINLOCK(dmemcg_lock); 92 static LIST_HEAD(dmem_cgroup_regions); 93 94 static void dmemcg_free_region(struct kref *ref); 95 static void dmemcg_pool_free_rcu(struct rcu_head *rcu); 96 97 static inline struct dmemcg_state * 98 css_to_dmemcs(struct cgroup_subsys_state *css) 99 { 100 return container_of(css, struct dmemcg_state, css); 101 } 102 103 static inline struct dmemcg_state *get_current_dmemcs(void) 104 { 105 return css_to_dmemcs(task_get_css(current, dmem_cgrp_id)); 106 } 107 108 static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg) 109 { 110 return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL; 111 } 112 113 static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool) 114 { 115 refcount_inc(&pool->ref); 116 } 117 118 static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool) 119 { 120 return refcount_inc_not_zero(&pool->ref); 121 } 122 123 static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool) 124 { 125 if (!refcount_dec_and_test(&pool->ref)) 126 return; 127 128 call_rcu(&pool->rcu, dmemcg_pool_free_rcu); 129 } 130 131 static void dmemcg_pool_free_rcu(struct rcu_head *rcu) 132 { 133 struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu); 134 135 if (pool->parent) 136 dmemcg_pool_put(pool->parent); 137 kref_put(&pool->region->ref, dmemcg_free_region); 138 kfree(pool); 139 } 140 141 static void free_cg_pool(struct dmem_cgroup_pool_state *pool) 142 { 143 list_del(&pool->region_node); 144 dmemcg_pool_put(pool); 145 } 146 147 static void 148 set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val) 149 { 150 page_counter_set_min(&pool->cnt, val); 151 } 152 153 static void 154 set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val) 155 { 156 page_counter_set_low(&pool->cnt, val); 157 } 158 159 static void 160 set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val) 161 { 162 page_counter_set_max(&pool->cnt, val); 163 } 164 165 static u64 get_resource_low(struct dmem_cgroup_pool_state *pool) 166 { 167 return pool ? READ_ONCE(pool->cnt.low) : 0; 168 } 169 170 static u64 get_resource_min(struct dmem_cgroup_pool_state *pool) 171 { 172 return pool ? READ_ONCE(pool->cnt.min) : 0; 173 } 174 175 static u64 get_resource_max(struct dmem_cgroup_pool_state *pool) 176 { 177 return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX; 178 } 179 180 static u64 get_resource_current(struct dmem_cgroup_pool_state *pool) 181 { 182 return pool ? page_counter_read(&pool->cnt) : 0; 183 } 184 185 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool) 186 { 187 set_resource_min(rpool, 0); 188 set_resource_low(rpool, 0); 189 set_resource_max(rpool, PAGE_COUNTER_MAX); 190 } 191 192 static void dmemcs_offline(struct cgroup_subsys_state *css) 193 { 194 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 195 struct dmem_cgroup_pool_state *pool; 196 197 rcu_read_lock(); 198 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node) 199 reset_all_resource_limits(pool); 200 rcu_read_unlock(); 201 } 202 203 static void dmemcs_free(struct cgroup_subsys_state *css) 204 { 205 struct dmemcg_state *dmemcs = css_to_dmemcs(css); 206 struct dmem_cgroup_pool_state *pool, *next; 207 208 spin_lock(&dmemcg_lock); 209 list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) { 210 /* 211 *The pool is dead and all references are 0, 212 * no need for RCU protection with list_del_rcu or freeing. 213 */ 214 list_del(&pool->css_node); 215 free_cg_pool(pool); 216 } 217 spin_unlock(&dmemcg_lock); 218 219 kfree(dmemcs); 220 } 221 222 static struct cgroup_subsys_state * 223 dmemcs_alloc(struct cgroup_subsys_state *parent_css) 224 { 225 struct dmemcg_state *dmemcs = kzalloc(sizeof(*dmemcs), GFP_KERNEL); 226 if (!dmemcs) 227 return ERR_PTR(-ENOMEM); 228 229 INIT_LIST_HEAD(&dmemcs->pools); 230 return &dmemcs->css; 231 } 232 233 static struct dmem_cgroup_pool_state * 234 find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region) 235 { 236 struct dmem_cgroup_pool_state *pool; 237 238 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock)) 239 if (pool->region == region) 240 return pool; 241 242 return NULL; 243 } 244 245 static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool) 246 { 247 if (!pool->cnt.parent) 248 return NULL; 249 250 return container_of(pool->cnt.parent, typeof(*pool), cnt); 251 } 252 253 static void 254 dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool, 255 struct dmem_cgroup_pool_state *test_pool) 256 { 257 struct page_counter *climit; 258 struct cgroup_subsys_state *css; 259 struct dmemcg_state *dmemcg_iter; 260 struct dmem_cgroup_pool_state *pool, *found_pool; 261 262 climit = &limit_pool->cnt; 263 264 rcu_read_lock(); 265 266 css_for_each_descendant_pre(css, &limit_pool->cs->css) { 267 dmemcg_iter = container_of(css, struct dmemcg_state, css); 268 found_pool = NULL; 269 270 list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) { 271 if (pool->region == limit_pool->region) { 272 found_pool = pool; 273 break; 274 } 275 } 276 if (!found_pool) 277 continue; 278 279 page_counter_calculate_protection( 280 climit, &found_pool->cnt, true); 281 282 if (found_pool == test_pool) 283 break; 284 } 285 rcu_read_unlock(); 286 } 287 288 /** 289 * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool 290 * @limit_pool: The pool for which we hit limits 291 * @test_pool: The pool for which to test 292 * @ignore_low: Whether we have to respect low watermarks. 293 * @ret_hit_low: Pointer to whether it makes sense to consider low watermark. 294 * 295 * This function returns true if we can evict from @test_pool, false if not. 296 * When returning false and @ignore_low is false, @ret_hit_low may 297 * be set to true to indicate this function can be retried with @ignore_low 298 * set to true. 299 * 300 * Return: bool 301 */ 302 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, 303 struct dmem_cgroup_pool_state *test_pool, 304 bool ignore_low, bool *ret_hit_low) 305 { 306 struct dmem_cgroup_pool_state *pool = test_pool; 307 struct page_counter *ctest; 308 u64 used, min, low; 309 310 /* Can always evict from current pool, despite limits */ 311 if (limit_pool == test_pool) 312 return true; 313 314 if (limit_pool) { 315 if (!parent_dmemcs(limit_pool->cs)) 316 return true; 317 318 for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool)) 319 {} 320 321 if (!pool) 322 return false; 323 } else { 324 /* 325 * If there is no cgroup limiting memory usage, use the root 326 * cgroup instead for limit calculations. 327 */ 328 for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool)) 329 {} 330 } 331 332 ctest = &test_pool->cnt; 333 334 dmem_cgroup_calculate_protection(limit_pool, test_pool); 335 336 used = page_counter_read(ctest); 337 min = READ_ONCE(ctest->emin); 338 339 if (used <= min) 340 return false; 341 342 if (!ignore_low) { 343 low = READ_ONCE(ctest->elow); 344 if (used > low) 345 return true; 346 347 *ret_hit_low = true; 348 return false; 349 } 350 return true; 351 } 352 EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable); 353 354 static struct dmem_cgroup_pool_state * 355 alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 356 struct dmem_cgroup_pool_state **allocpool) 357 { 358 struct dmemcg_state *parent = parent_dmemcs(dmemcs); 359 struct dmem_cgroup_pool_state *pool, *ppool = NULL; 360 361 if (!*allocpool) { 362 pool = kzalloc(sizeof(*pool), GFP_NOWAIT); 363 if (!pool) 364 return ERR_PTR(-ENOMEM); 365 } else { 366 pool = *allocpool; 367 *allocpool = NULL; 368 } 369 370 pool->region = region; 371 pool->cs = dmemcs; 372 373 if (parent) 374 ppool = find_cg_pool_locked(parent, region); 375 376 page_counter_init(&pool->cnt, 377 ppool ? &ppool->cnt : NULL, true); 378 reset_all_resource_limits(pool); 379 refcount_set(&pool->ref, 1); 380 kref_get(®ion->ref); 381 if (ppool && !pool->parent) { 382 pool->parent = ppool; 383 dmemcg_pool_get(ppool); 384 } 385 386 list_add_tail_rcu(&pool->css_node, &dmemcs->pools); 387 list_add_tail(&pool->region_node, ®ion->pools); 388 389 if (!parent) 390 pool->inited = true; 391 else 392 pool->inited = ppool ? ppool->inited : false; 393 return pool; 394 } 395 396 static struct dmem_cgroup_pool_state * 397 get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region, 398 struct dmem_cgroup_pool_state **allocpool) 399 { 400 struct dmem_cgroup_pool_state *pool, *ppool, *retpool; 401 struct dmemcg_state *p, *pp; 402 403 /* 404 * Recursively create pool, we may not initialize yet on 405 * recursion, this is done as a separate step. 406 */ 407 for (p = dmemcs; p; p = parent_dmemcs(p)) { 408 pool = find_cg_pool_locked(p, region); 409 if (!pool) 410 pool = alloc_pool_single(p, region, allocpool); 411 412 if (IS_ERR(pool)) 413 return pool; 414 415 if (p == dmemcs && pool->inited) 416 return pool; 417 418 if (pool->inited) 419 break; 420 } 421 422 retpool = pool = find_cg_pool_locked(dmemcs, region); 423 for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) { 424 if (pool->inited) 425 break; 426 427 /* ppool was created if it didn't exist by above loop. */ 428 ppool = find_cg_pool_locked(pp, region); 429 430 /* Fix up parent links, mark as inited. */ 431 pool->cnt.parent = &ppool->cnt; 432 if (ppool && !pool->parent) { 433 pool->parent = ppool; 434 dmemcg_pool_get(ppool); 435 } 436 pool->inited = true; 437 438 pool = ppool; 439 } 440 441 return retpool; 442 } 443 444 static void dmemcg_free_rcu(struct rcu_head *rcu) 445 { 446 struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu); 447 struct dmem_cgroup_pool_state *pool, *next; 448 449 list_for_each_entry_safe(pool, next, ®ion->pools, region_node) 450 free_cg_pool(pool); 451 kfree(region->name); 452 kfree(region); 453 } 454 455 static void dmemcg_free_region(struct kref *ref) 456 { 457 struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref); 458 459 call_rcu(&cgregion->rcu, dmemcg_free_rcu); 460 } 461 462 /** 463 * dmem_cgroup_unregister_region() - Unregister a previously registered region. 464 * @region: The region to unregister. 465 * 466 * This function undoes dmem_cgroup_register_region. 467 */ 468 void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) 469 { 470 struct dmem_cgroup_pool_state *pool, *next; 471 472 if (!region) 473 return; 474 475 spin_lock(&dmemcg_lock); 476 477 /* Remove from global region list */ 478 list_del_rcu(®ion->region_node); 479 480 list_for_each_entry_safe(pool, next, ®ion->pools, region_node) { 481 list_del_rcu(&pool->css_node); 482 list_del(&pool->region_node); 483 dmemcg_pool_put(pool); 484 } 485 486 /* 487 * Ensure any RCU based lookups fail. Additionally, 488 * no new pools should be added to the dead region 489 * by get_cg_pool_unlocked. 490 */ 491 region->unregistered = true; 492 spin_unlock(&dmemcg_lock); 493 494 kref_put(®ion->ref, dmemcg_free_region); 495 } 496 EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region); 497 498 /** 499 * dmem_cgroup_register_region() - Register a regions for dev cgroup. 500 * @size: Size of region to register, in bytes. 501 * @fmt: Region parameters to register 502 * 503 * This function registers a node in the dmem cgroup with the 504 * name given. After calling this function, the region can be 505 * used for allocations. 506 * 507 * Return: NULL or a struct on success, PTR_ERR on failure. 508 */ 509 struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...) 510 { 511 struct dmem_cgroup_region *ret; 512 char *region_name; 513 va_list ap; 514 515 if (!size) 516 return NULL; 517 518 va_start(ap, fmt); 519 region_name = kvasprintf(GFP_KERNEL, fmt, ap); 520 va_end(ap); 521 if (!region_name) 522 return ERR_PTR(-ENOMEM); 523 524 ret = kzalloc(sizeof(*ret), GFP_KERNEL); 525 if (!ret) { 526 kfree(region_name); 527 return ERR_PTR(-ENOMEM); 528 } 529 530 INIT_LIST_HEAD(&ret->pools); 531 ret->name = region_name; 532 ret->size = size; 533 kref_init(&ret->ref); 534 535 spin_lock(&dmemcg_lock); 536 list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions); 537 spin_unlock(&dmemcg_lock); 538 539 return ret; 540 } 541 EXPORT_SYMBOL_GPL(dmem_cgroup_register_region); 542 543 static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name) 544 { 545 struct dmem_cgroup_region *region; 546 547 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock)) 548 if (!strcmp(name, region->name) && 549 kref_get_unless_zero(®ion->ref)) 550 return region; 551 552 return NULL; 553 } 554 555 /** 556 * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state 557 * @pool: &dmem_cgroup_pool_state 558 * 559 * Called to drop a reference to the limiting pool returned by 560 * dmem_cgroup_try_charge(). 561 */ 562 void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) 563 { 564 if (pool) { 565 css_put(&pool->cs->css); 566 dmemcg_pool_put(pool); 567 } 568 } 569 EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put); 570 571 static struct dmem_cgroup_pool_state * 572 get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) 573 { 574 struct dmem_cgroup_pool_state *pool, *allocpool = NULL; 575 576 /* fastpath lookup? */ 577 rcu_read_lock(); 578 pool = find_cg_pool_locked(cg, region); 579 if (pool && !READ_ONCE(pool->inited)) 580 pool = NULL; 581 if (pool && !dmemcg_pool_tryget(pool)) 582 pool = NULL; 583 rcu_read_unlock(); 584 585 while (!pool) { 586 spin_lock(&dmemcg_lock); 587 if (!region->unregistered) 588 pool = get_cg_pool_locked(cg, region, &allocpool); 589 else 590 pool = ERR_PTR(-ENODEV); 591 if (!IS_ERR(pool)) 592 dmemcg_pool_get(pool); 593 spin_unlock(&dmemcg_lock); 594 595 if (pool == ERR_PTR(-ENOMEM)) { 596 pool = NULL; 597 if (WARN_ON(allocpool)) 598 continue; 599 600 allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL); 601 if (allocpool) { 602 pool = NULL; 603 continue; 604 } 605 } 606 } 607 608 kfree(allocpool); 609 return pool; 610 } 611 612 /** 613 * dmem_cgroup_uncharge() - Uncharge a pool. 614 * @pool: Pool to uncharge. 615 * @size: Size to uncharge. 616 * 617 * Undoes the effects of dmem_cgroup_try_charge. 618 * Must be called with the returned pool as argument, 619 * and same @index and @size. 620 */ 621 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) 622 { 623 if (!pool) 624 return; 625 626 page_counter_uncharge(&pool->cnt, size); 627 css_put(&pool->cs->css); 628 dmemcg_pool_put(pool); 629 } 630 EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge); 631 632 /** 633 * dmem_cgroup_try_charge() - Try charging a new allocation to a region. 634 * @region: dmem region to charge 635 * @size: Size (in bytes) to charge. 636 * @ret_pool: On succesfull allocation, the pool that is charged. 637 * @ret_limit_pool: On a failed allocation, the limiting pool. 638 * 639 * This function charges the @region region for a size of @size bytes. 640 * 641 * If the function succeeds, @ret_pool is set, which must be passed to 642 * dmem_cgroup_uncharge() when undoing the allocation. 643 * 644 * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it 645 * will be set to the pool for which the limit is hit. This can be used for 646 * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed 647 * with @dmem_cgroup_pool_state_put(). 648 * 649 * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure. 650 */ 651 int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, 652 struct dmem_cgroup_pool_state **ret_pool, 653 struct dmem_cgroup_pool_state **ret_limit_pool) 654 { 655 struct dmemcg_state *cg; 656 struct dmem_cgroup_pool_state *pool; 657 struct page_counter *fail; 658 int ret; 659 660 *ret_pool = NULL; 661 if (ret_limit_pool) 662 *ret_limit_pool = NULL; 663 664 /* 665 * hold on to css, as cgroup can be removed but resource 666 * accounting happens on css. 667 */ 668 cg = get_current_dmemcs(); 669 670 pool = get_cg_pool_unlocked(cg, region); 671 if (IS_ERR(pool)) { 672 ret = PTR_ERR(pool); 673 goto err; 674 } 675 676 if (!page_counter_try_charge(&pool->cnt, size, &fail)) { 677 if (ret_limit_pool) { 678 *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt); 679 css_get(&(*ret_limit_pool)->cs->css); 680 dmemcg_pool_get(*ret_limit_pool); 681 } 682 dmemcg_pool_put(pool); 683 ret = -EAGAIN; 684 goto err; 685 } 686 687 /* On success, reference from get_current_dmemcs is transferred to *ret_pool */ 688 *ret_pool = pool; 689 return 0; 690 691 err: 692 css_put(&cg->css); 693 return ret; 694 } 695 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge); 696 697 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v) 698 { 699 struct dmem_cgroup_region *region; 700 701 rcu_read_lock(); 702 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 703 seq_puts(sf, region->name); 704 seq_printf(sf, " %llu\n", region->size); 705 } 706 rcu_read_unlock(); 707 return 0; 708 } 709 710 static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region, 711 u64 *new_limit) 712 { 713 char *end; 714 715 if (!strcmp(options, "max")) { 716 *new_limit = PAGE_COUNTER_MAX; 717 return 0; 718 } 719 720 *new_limit = memparse(options, &end); 721 if (*end != '\0') 722 return -EINVAL; 723 724 return 0; 725 } 726 727 static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, 728 char *buf, size_t nbytes, loff_t off, 729 void (*apply)(struct dmem_cgroup_pool_state *, u64)) 730 { 731 struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of)); 732 int err = 0; 733 734 while (buf && !err) { 735 struct dmem_cgroup_pool_state *pool = NULL; 736 char *options, *region_name; 737 struct dmem_cgroup_region *region; 738 u64 new_limit; 739 740 options = buf; 741 buf = strchr(buf, '\n'); 742 if (buf) 743 *buf++ = '\0'; 744 745 options = strstrip(options); 746 747 /* eat empty lines */ 748 if (!options[0]) 749 continue; 750 751 region_name = strsep(&options, " \t"); 752 if (!region_name[0]) 753 continue; 754 755 if (!options || !*options) 756 return -EINVAL; 757 758 rcu_read_lock(); 759 region = dmemcg_get_region_by_name(region_name); 760 rcu_read_unlock(); 761 762 if (!region) 763 return -EINVAL; 764 765 err = dmemcg_parse_limit(options, region, &new_limit); 766 if (err < 0) 767 goto out_put; 768 769 pool = get_cg_pool_unlocked(dmemcs, region); 770 if (IS_ERR(pool)) { 771 err = PTR_ERR(pool); 772 goto out_put; 773 } 774 775 /* And commit */ 776 apply(pool, new_limit); 777 dmemcg_pool_put(pool); 778 779 out_put: 780 kref_put(®ion->ref, dmemcg_free_region); 781 } 782 783 784 return err ?: nbytes; 785 } 786 787 static int dmemcg_limit_show(struct seq_file *sf, void *v, 788 u64 (*fn)(struct dmem_cgroup_pool_state *)) 789 { 790 struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf)); 791 struct dmem_cgroup_region *region; 792 793 rcu_read_lock(); 794 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) { 795 struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region); 796 u64 val; 797 798 seq_puts(sf, region->name); 799 800 val = fn(pool); 801 if (val < PAGE_COUNTER_MAX) 802 seq_printf(sf, " %lld\n", val); 803 else 804 seq_puts(sf, " max\n"); 805 } 806 rcu_read_unlock(); 807 808 return 0; 809 } 810 811 static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v) 812 { 813 return dmemcg_limit_show(sf, v, get_resource_current); 814 } 815 816 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v) 817 { 818 return dmemcg_limit_show(sf, v, get_resource_min); 819 } 820 821 static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of, 822 char *buf, size_t nbytes, loff_t off) 823 { 824 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min); 825 } 826 827 static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v) 828 { 829 return dmemcg_limit_show(sf, v, get_resource_low); 830 } 831 832 static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of, 833 char *buf, size_t nbytes, loff_t off) 834 { 835 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low); 836 } 837 838 static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v) 839 { 840 return dmemcg_limit_show(sf, v, get_resource_max); 841 } 842 843 static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of, 844 char *buf, size_t nbytes, loff_t off) 845 { 846 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max); 847 } 848 849 static struct cftype files[] = { 850 { 851 .name = "capacity", 852 .seq_show = dmem_cgroup_region_capacity_show, 853 .flags = CFTYPE_ONLY_ON_ROOT, 854 }, 855 { 856 .name = "current", 857 .seq_show = dmem_cgroup_region_current_show, 858 }, 859 { 860 .name = "min", 861 .write = dmem_cgroup_region_min_write, 862 .seq_show = dmem_cgroup_region_min_show, 863 .flags = CFTYPE_NOT_ON_ROOT, 864 }, 865 { 866 .name = "low", 867 .write = dmem_cgroup_region_low_write, 868 .seq_show = dmem_cgroup_region_low_show, 869 .flags = CFTYPE_NOT_ON_ROOT, 870 }, 871 { 872 .name = "max", 873 .write = dmem_cgroup_region_max_write, 874 .seq_show = dmem_cgroup_region_max_show, 875 .flags = CFTYPE_NOT_ON_ROOT, 876 }, 877 { } /* Zero entry terminates. */ 878 }; 879 880 struct cgroup_subsys dmem_cgrp_subsys = { 881 .css_alloc = dmemcs_alloc, 882 .css_free = dmemcs_free, 883 .css_offline = dmemcs_offline, 884 .legacy_cftypes = files, 885 .dfl_cftypes = files, 886 }; 887