1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>)
4 * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>)
5 * Partially based on the rdma and misc controllers, which bear the following copyrights:
6 *
7 * Copyright 2020 Google LLC
8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
9 */
10
11 #include <linux/cgroup.h>
12 #include <linux/cgroup_dmem.h>
13 #include <linux/list.h>
14 #include <linux/mutex.h>
15 #include <linux/page_counter.h>
16 #include <linux/parser.h>
17 #include <linux/slab.h>
18
19 struct dmem_cgroup_region {
20 /**
21 * @ref: References keeping the region alive.
22 * Keeps the region reference alive after a succesful RCU lookup.
23 */
24 struct kref ref;
25
26 /** @rcu: RCU head for freeing */
27 struct rcu_head rcu;
28
29 /**
30 * @region_node: Linked into &dmem_cgroup_regions list.
31 * Protected by RCU and global spinlock.
32 */
33 struct list_head region_node;
34
35 /**
36 * @pools: List of pools linked to this region.
37 * Protected by global spinlock only
38 */
39 struct list_head pools;
40
41 /** @size: Size of region, in bytes */
42 u64 size;
43
44 /** @name: Name describing the node, set by dmem_cgroup_register_region */
45 char *name;
46
47 /**
48 * @unregistered: Whether the region is unregistered by its caller.
49 * No new pools should be added to the region afterwards.
50 */
51 bool unregistered;
52 };
53
54 struct dmemcg_state {
55 struct cgroup_subsys_state css;
56
57 struct list_head pools;
58 };
59
60 struct dmem_cgroup_pool_state {
61 struct dmem_cgroup_region *region;
62 struct dmemcg_state *cs;
63
64 /* css node, RCU protected against region teardown */
65 struct list_head css_node;
66
67 /* dev node, no RCU protection required */
68 struct list_head region_node;
69
70 struct rcu_head rcu;
71
72 struct page_counter cnt;
73
74 bool inited;
75 };
76
77 /*
78 * 3 operations require locking protection:
79 * - Registering and unregistering region to/from list, requires global lock.
80 * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed.
81 * - Adding a dmem_cgroup_pool_state to a region list.
82 *
83 * Since for the most common operations RCU provides enough protection, I
84 * do not think more granular locking makes sense. Most protection is offered
85 * by RCU and the lockless operating page_counter.
86 */
87 static DEFINE_SPINLOCK(dmemcg_lock);
88 static LIST_HEAD(dmem_cgroup_regions);
89
90 static inline struct dmemcg_state *
css_to_dmemcs(struct cgroup_subsys_state * css)91 css_to_dmemcs(struct cgroup_subsys_state *css)
92 {
93 return container_of(css, struct dmemcg_state, css);
94 }
95
get_current_dmemcs(void)96 static inline struct dmemcg_state *get_current_dmemcs(void)
97 {
98 return css_to_dmemcs(task_get_css(current, dmem_cgrp_id));
99 }
100
parent_dmemcs(struct dmemcg_state * cg)101 static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg)
102 {
103 return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL;
104 }
105
free_cg_pool(struct dmem_cgroup_pool_state * pool)106 static void free_cg_pool(struct dmem_cgroup_pool_state *pool)
107 {
108 list_del(&pool->region_node);
109 kfree(pool);
110 }
111
112 static void
set_resource_min(struct dmem_cgroup_pool_state * pool,u64 val)113 set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val)
114 {
115 page_counter_set_min(&pool->cnt, val);
116 }
117
118 static void
set_resource_low(struct dmem_cgroup_pool_state * pool,u64 val)119 set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val)
120 {
121 page_counter_set_low(&pool->cnt, val);
122 }
123
124 static void
set_resource_max(struct dmem_cgroup_pool_state * pool,u64 val)125 set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val)
126 {
127 page_counter_set_max(&pool->cnt, val);
128 }
129
get_resource_low(struct dmem_cgroup_pool_state * pool)130 static u64 get_resource_low(struct dmem_cgroup_pool_state *pool)
131 {
132 return pool ? READ_ONCE(pool->cnt.low) : 0;
133 }
134
get_resource_min(struct dmem_cgroup_pool_state * pool)135 static u64 get_resource_min(struct dmem_cgroup_pool_state *pool)
136 {
137 return pool ? READ_ONCE(pool->cnt.min) : 0;
138 }
139
get_resource_max(struct dmem_cgroup_pool_state * pool)140 static u64 get_resource_max(struct dmem_cgroup_pool_state *pool)
141 {
142 return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX;
143 }
144
get_resource_current(struct dmem_cgroup_pool_state * pool)145 static u64 get_resource_current(struct dmem_cgroup_pool_state *pool)
146 {
147 return pool ? page_counter_read(&pool->cnt) : 0;
148 }
149
reset_all_resource_limits(struct dmem_cgroup_pool_state * rpool)150 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool)
151 {
152 set_resource_min(rpool, 0);
153 set_resource_low(rpool, 0);
154 set_resource_max(rpool, PAGE_COUNTER_MAX);
155 }
156
dmemcs_offline(struct cgroup_subsys_state * css)157 static void dmemcs_offline(struct cgroup_subsys_state *css)
158 {
159 struct dmemcg_state *dmemcs = css_to_dmemcs(css);
160 struct dmem_cgroup_pool_state *pool;
161
162 rcu_read_lock();
163 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node)
164 reset_all_resource_limits(pool);
165 rcu_read_unlock();
166 }
167
dmemcs_free(struct cgroup_subsys_state * css)168 static void dmemcs_free(struct cgroup_subsys_state *css)
169 {
170 struct dmemcg_state *dmemcs = css_to_dmemcs(css);
171 struct dmem_cgroup_pool_state *pool, *next;
172
173 spin_lock(&dmemcg_lock);
174 list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) {
175 /*
176 *The pool is dead and all references are 0,
177 * no need for RCU protection with list_del_rcu or freeing.
178 */
179 list_del(&pool->css_node);
180 free_cg_pool(pool);
181 }
182 spin_unlock(&dmemcg_lock);
183
184 kfree(dmemcs);
185 }
186
187 static struct cgroup_subsys_state *
dmemcs_alloc(struct cgroup_subsys_state * parent_css)188 dmemcs_alloc(struct cgroup_subsys_state *parent_css)
189 {
190 struct dmemcg_state *dmemcs = kzalloc(sizeof(*dmemcs), GFP_KERNEL);
191 if (!dmemcs)
192 return ERR_PTR(-ENOMEM);
193
194 INIT_LIST_HEAD(&dmemcs->pools);
195 return &dmemcs->css;
196 }
197
198 static struct dmem_cgroup_pool_state *
find_cg_pool_locked(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region)199 find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region)
200 {
201 struct dmem_cgroup_pool_state *pool;
202
203 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock))
204 if (pool->region == region)
205 return pool;
206
207 return NULL;
208 }
209
pool_parent(struct dmem_cgroup_pool_state * pool)210 static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool)
211 {
212 if (!pool->cnt.parent)
213 return NULL;
214
215 return container_of(pool->cnt.parent, typeof(*pool), cnt);
216 }
217
218 static void
dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state * limit_pool,struct dmem_cgroup_pool_state * test_pool)219 dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool,
220 struct dmem_cgroup_pool_state *test_pool)
221 {
222 struct page_counter *climit;
223 struct cgroup_subsys_state *css;
224 struct dmemcg_state *dmemcg_iter;
225 struct dmem_cgroup_pool_state *pool, *found_pool;
226
227 climit = &limit_pool->cnt;
228
229 rcu_read_lock();
230
231 css_for_each_descendant_pre(css, &limit_pool->cs->css) {
232 dmemcg_iter = container_of(css, struct dmemcg_state, css);
233 found_pool = NULL;
234
235 list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) {
236 if (pool->region == limit_pool->region) {
237 found_pool = pool;
238 break;
239 }
240 }
241 if (!found_pool)
242 continue;
243
244 page_counter_calculate_protection(
245 climit, &found_pool->cnt, true);
246
247 if (found_pool == test_pool)
248 break;
249 }
250 rcu_read_unlock();
251 }
252
253 /**
254 * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool
255 * @limit_pool: The pool for which we hit limits
256 * @test_pool: The pool for which to test
257 * @ignore_low: Whether we have to respect low watermarks.
258 * @ret_hit_low: Pointer to whether it makes sense to consider low watermark.
259 *
260 * This function returns true if we can evict from @test_pool, false if not.
261 * When returning false and @ignore_low is false, @ret_hit_low may
262 * be set to true to indicate this function can be retried with @ignore_low
263 * set to true.
264 *
265 * Return: bool
266 */
dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state * limit_pool,struct dmem_cgroup_pool_state * test_pool,bool ignore_low,bool * ret_hit_low)267 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
268 struct dmem_cgroup_pool_state *test_pool,
269 bool ignore_low, bool *ret_hit_low)
270 {
271 struct dmem_cgroup_pool_state *pool = test_pool;
272 struct page_counter *ctest;
273 u64 used, min, low;
274
275 /* Can always evict from current pool, despite limits */
276 if (limit_pool == test_pool)
277 return true;
278
279 if (limit_pool) {
280 if (!parent_dmemcs(limit_pool->cs))
281 return true;
282
283 for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool))
284 {}
285
286 if (!pool)
287 return false;
288 } else {
289 /*
290 * If there is no cgroup limiting memory usage, use the root
291 * cgroup instead for limit calculations.
292 */
293 for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool))
294 {}
295 }
296
297 ctest = &test_pool->cnt;
298
299 dmem_cgroup_calculate_protection(limit_pool, test_pool);
300
301 used = page_counter_read(ctest);
302 min = READ_ONCE(ctest->emin);
303
304 if (used <= min)
305 return false;
306
307 if (!ignore_low) {
308 low = READ_ONCE(ctest->elow);
309 if (used > low)
310 return true;
311
312 *ret_hit_low = true;
313 return false;
314 }
315 return true;
316 }
317 EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable);
318
319 static struct dmem_cgroup_pool_state *
alloc_pool_single(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region,struct dmem_cgroup_pool_state ** allocpool)320 alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
321 struct dmem_cgroup_pool_state **allocpool)
322 {
323 struct dmemcg_state *parent = parent_dmemcs(dmemcs);
324 struct dmem_cgroup_pool_state *pool, *ppool = NULL;
325
326 if (!*allocpool) {
327 pool = kzalloc(sizeof(*pool), GFP_NOWAIT);
328 if (!pool)
329 return ERR_PTR(-ENOMEM);
330 } else {
331 pool = *allocpool;
332 *allocpool = NULL;
333 }
334
335 pool->region = region;
336 pool->cs = dmemcs;
337
338 if (parent)
339 ppool = find_cg_pool_locked(parent, region);
340
341 page_counter_init(&pool->cnt,
342 ppool ? &ppool->cnt : NULL, true);
343 reset_all_resource_limits(pool);
344
345 list_add_tail_rcu(&pool->css_node, &dmemcs->pools);
346 list_add_tail(&pool->region_node, ®ion->pools);
347
348 if (!parent)
349 pool->inited = true;
350 else
351 pool->inited = ppool ? ppool->inited : false;
352 return pool;
353 }
354
355 static struct dmem_cgroup_pool_state *
get_cg_pool_locked(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region,struct dmem_cgroup_pool_state ** allocpool)356 get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
357 struct dmem_cgroup_pool_state **allocpool)
358 {
359 struct dmem_cgroup_pool_state *pool, *ppool, *retpool;
360 struct dmemcg_state *p, *pp;
361
362 /*
363 * Recursively create pool, we may not initialize yet on
364 * recursion, this is done as a separate step.
365 */
366 for (p = dmemcs; p; p = parent_dmemcs(p)) {
367 pool = find_cg_pool_locked(p, region);
368 if (!pool)
369 pool = alloc_pool_single(p, region, allocpool);
370
371 if (IS_ERR(pool))
372 return pool;
373
374 if (p == dmemcs && pool->inited)
375 return pool;
376
377 if (pool->inited)
378 break;
379 }
380
381 retpool = pool = find_cg_pool_locked(dmemcs, region);
382 for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) {
383 if (pool->inited)
384 break;
385
386 /* ppool was created if it didn't exist by above loop. */
387 ppool = find_cg_pool_locked(pp, region);
388
389 /* Fix up parent links, mark as inited. */
390 pool->cnt.parent = &ppool->cnt;
391 pool->inited = true;
392
393 pool = ppool;
394 }
395
396 return retpool;
397 }
398
dmemcg_free_rcu(struct rcu_head * rcu)399 static void dmemcg_free_rcu(struct rcu_head *rcu)
400 {
401 struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu);
402 struct dmem_cgroup_pool_state *pool, *next;
403
404 list_for_each_entry_safe(pool, next, ®ion->pools, region_node)
405 free_cg_pool(pool);
406 kfree(region->name);
407 kfree(region);
408 }
409
dmemcg_free_region(struct kref * ref)410 static void dmemcg_free_region(struct kref *ref)
411 {
412 struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref);
413
414 call_rcu(&cgregion->rcu, dmemcg_free_rcu);
415 }
416
417 /**
418 * dmem_cgroup_unregister_region() - Unregister a previously registered region.
419 * @region: The region to unregister.
420 *
421 * This function undoes dmem_cgroup_register_region.
422 */
dmem_cgroup_unregister_region(struct dmem_cgroup_region * region)423 void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
424 {
425 struct list_head *entry;
426
427 if (!region)
428 return;
429
430 spin_lock(&dmemcg_lock);
431
432 /* Remove from global region list */
433 list_del_rcu(®ion->region_node);
434
435 list_for_each_rcu(entry, ®ion->pools) {
436 struct dmem_cgroup_pool_state *pool =
437 container_of(entry, typeof(*pool), region_node);
438
439 list_del_rcu(&pool->css_node);
440 }
441
442 /*
443 * Ensure any RCU based lookups fail. Additionally,
444 * no new pools should be added to the dead region
445 * by get_cg_pool_unlocked.
446 */
447 region->unregistered = true;
448 spin_unlock(&dmemcg_lock);
449
450 kref_put(®ion->ref, dmemcg_free_region);
451 }
452 EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region);
453
454 /**
455 * dmem_cgroup_register_region() - Register a regions for dev cgroup.
456 * @size: Size of region to register, in bytes.
457 * @fmt: Region parameters to register
458 *
459 * This function registers a node in the dmem cgroup with the
460 * name given. After calling this function, the region can be
461 * used for allocations.
462 *
463 * Return: NULL or a struct on success, PTR_ERR on failure.
464 */
dmem_cgroup_register_region(u64 size,const char * fmt,...)465 struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...)
466 {
467 struct dmem_cgroup_region *ret;
468 char *region_name;
469 va_list ap;
470
471 if (!size)
472 return NULL;
473
474 va_start(ap, fmt);
475 region_name = kvasprintf(GFP_KERNEL, fmt, ap);
476 va_end(ap);
477 if (!region_name)
478 return ERR_PTR(-ENOMEM);
479
480 ret = kzalloc(sizeof(*ret), GFP_KERNEL);
481 if (!ret) {
482 kfree(region_name);
483 return ERR_PTR(-ENOMEM);
484 }
485
486 INIT_LIST_HEAD(&ret->pools);
487 ret->name = region_name;
488 ret->size = size;
489 kref_init(&ret->ref);
490
491 spin_lock(&dmemcg_lock);
492 list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions);
493 spin_unlock(&dmemcg_lock);
494
495 return ret;
496 }
497 EXPORT_SYMBOL_GPL(dmem_cgroup_register_region);
498
dmemcg_get_region_by_name(const char * name)499 static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name)
500 {
501 struct dmem_cgroup_region *region;
502
503 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock))
504 if (!strcmp(name, region->name) &&
505 kref_get_unless_zero(®ion->ref))
506 return region;
507
508 return NULL;
509 }
510
511 /**
512 * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state
513 * @pool: &dmem_cgroup_pool_state
514 *
515 * Called to drop a reference to the limiting pool returned by
516 * dmem_cgroup_try_charge().
517 */
dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state * pool)518 void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
519 {
520 if (pool)
521 css_put(&pool->cs->css);
522 }
523 EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put);
524
525 static struct dmem_cgroup_pool_state *
get_cg_pool_unlocked(struct dmemcg_state * cg,struct dmem_cgroup_region * region)526 get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
527 {
528 struct dmem_cgroup_pool_state *pool, *allocpool = NULL;
529
530 /* fastpath lookup? */
531 rcu_read_lock();
532 pool = find_cg_pool_locked(cg, region);
533 if (pool && !READ_ONCE(pool->inited))
534 pool = NULL;
535 rcu_read_unlock();
536
537 while (!pool) {
538 spin_lock(&dmemcg_lock);
539 if (!region->unregistered)
540 pool = get_cg_pool_locked(cg, region, &allocpool);
541 else
542 pool = ERR_PTR(-ENODEV);
543 spin_unlock(&dmemcg_lock);
544
545 if (pool == ERR_PTR(-ENOMEM)) {
546 pool = NULL;
547 if (WARN_ON(allocpool))
548 continue;
549
550 allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL);
551 if (allocpool) {
552 pool = NULL;
553 continue;
554 }
555 }
556 }
557
558 kfree(allocpool);
559 return pool;
560 }
561
562 /**
563 * dmem_cgroup_uncharge() - Uncharge a pool.
564 * @pool: Pool to uncharge.
565 * @size: Size to uncharge.
566 *
567 * Undoes the effects of dmem_cgroup_try_charge.
568 * Must be called with the returned pool as argument,
569 * and same @index and @size.
570 */
dmem_cgroup_uncharge(struct dmem_cgroup_pool_state * pool,u64 size)571 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
572 {
573 if (!pool)
574 return;
575
576 page_counter_uncharge(&pool->cnt, size);
577 css_put(&pool->cs->css);
578 }
579 EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge);
580
581 /**
582 * dmem_cgroup_try_charge() - Try charging a new allocation to a region.
583 * @region: dmem region to charge
584 * @size: Size (in bytes) to charge.
585 * @ret_pool: On succesfull allocation, the pool that is charged.
586 * @ret_limit_pool: On a failed allocation, the limiting pool.
587 *
588 * This function charges the @region region for a size of @size bytes.
589 *
590 * If the function succeeds, @ret_pool is set, which must be passed to
591 * dmem_cgroup_uncharge() when undoing the allocation.
592 *
593 * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it
594 * will be set to the pool for which the limit is hit. This can be used for
595 * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed
596 * with @dmem_cgroup_pool_state_put().
597 *
598 * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure.
599 */
dmem_cgroup_try_charge(struct dmem_cgroup_region * region,u64 size,struct dmem_cgroup_pool_state ** ret_pool,struct dmem_cgroup_pool_state ** ret_limit_pool)600 int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
601 struct dmem_cgroup_pool_state **ret_pool,
602 struct dmem_cgroup_pool_state **ret_limit_pool)
603 {
604 struct dmemcg_state *cg;
605 struct dmem_cgroup_pool_state *pool;
606 struct page_counter *fail;
607 int ret;
608
609 *ret_pool = NULL;
610 if (ret_limit_pool)
611 *ret_limit_pool = NULL;
612
613 /*
614 * hold on to css, as cgroup can be removed but resource
615 * accounting happens on css.
616 */
617 cg = get_current_dmemcs();
618
619 pool = get_cg_pool_unlocked(cg, region);
620 if (IS_ERR(pool)) {
621 ret = PTR_ERR(pool);
622 goto err;
623 }
624
625 if (!page_counter_try_charge(&pool->cnt, size, &fail)) {
626 if (ret_limit_pool) {
627 *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt);
628 css_get(&(*ret_limit_pool)->cs->css);
629 }
630 ret = -EAGAIN;
631 goto err;
632 }
633
634 /* On success, reference from get_current_dmemcs is transferred to *ret_pool */
635 *ret_pool = pool;
636 return 0;
637
638 err:
639 css_put(&cg->css);
640 return ret;
641 }
642 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge);
643
dmem_cgroup_region_capacity_show(struct seq_file * sf,void * v)644 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v)
645 {
646 struct dmem_cgroup_region *region;
647
648 rcu_read_lock();
649 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
650 seq_puts(sf, region->name);
651 seq_printf(sf, " %llu\n", region->size);
652 }
653 rcu_read_unlock();
654 return 0;
655 }
656
dmemcg_parse_limit(char * options,struct dmem_cgroup_region * region,u64 * new_limit)657 static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region,
658 u64 *new_limit)
659 {
660 char *end;
661
662 if (!strcmp(options, "max")) {
663 *new_limit = PAGE_COUNTER_MAX;
664 return 0;
665 }
666
667 *new_limit = memparse(options, &end);
668 if (*end != '\0')
669 return -EINVAL;
670
671 return 0;
672 }
673
dmemcg_limit_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off,void (* apply)(struct dmem_cgroup_pool_state *,u64))674 static ssize_t dmemcg_limit_write(struct kernfs_open_file *of,
675 char *buf, size_t nbytes, loff_t off,
676 void (*apply)(struct dmem_cgroup_pool_state *, u64))
677 {
678 struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of));
679 int err = 0;
680
681 while (buf && !err) {
682 struct dmem_cgroup_pool_state *pool = NULL;
683 char *options, *region_name;
684 struct dmem_cgroup_region *region;
685 u64 new_limit;
686
687 options = buf;
688 buf = strchr(buf, '\n');
689 if (buf)
690 *buf++ = '\0';
691
692 options = strstrip(options);
693
694 /* eat empty lines */
695 if (!options[0])
696 continue;
697
698 region_name = strsep(&options, " \t");
699 if (!region_name[0])
700 continue;
701
702 rcu_read_lock();
703 region = dmemcg_get_region_by_name(region_name);
704 rcu_read_unlock();
705
706 if (!region)
707 return -EINVAL;
708
709 err = dmemcg_parse_limit(options, region, &new_limit);
710 if (err < 0)
711 goto out_put;
712
713 pool = get_cg_pool_unlocked(dmemcs, region);
714 if (IS_ERR(pool)) {
715 err = PTR_ERR(pool);
716 goto out_put;
717 }
718
719 /* And commit */
720 apply(pool, new_limit);
721
722 out_put:
723 kref_put(®ion->ref, dmemcg_free_region);
724 }
725
726
727 return err ?: nbytes;
728 }
729
dmemcg_limit_show(struct seq_file * sf,void * v,u64 (* fn)(struct dmem_cgroup_pool_state *))730 static int dmemcg_limit_show(struct seq_file *sf, void *v,
731 u64 (*fn)(struct dmem_cgroup_pool_state *))
732 {
733 struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf));
734 struct dmem_cgroup_region *region;
735
736 rcu_read_lock();
737 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
738 struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region);
739 u64 val;
740
741 seq_puts(sf, region->name);
742
743 val = fn(pool);
744 if (val < PAGE_COUNTER_MAX)
745 seq_printf(sf, " %lld\n", val);
746 else
747 seq_puts(sf, " max\n");
748 }
749 rcu_read_unlock();
750
751 return 0;
752 }
753
dmem_cgroup_region_current_show(struct seq_file * sf,void * v)754 static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v)
755 {
756 return dmemcg_limit_show(sf, v, get_resource_current);
757 }
758
dmem_cgroup_region_min_show(struct seq_file * sf,void * v)759 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v)
760 {
761 return dmemcg_limit_show(sf, v, get_resource_min);
762 }
763
dmem_cgroup_region_min_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)764 static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of,
765 char *buf, size_t nbytes, loff_t off)
766 {
767 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min);
768 }
769
dmem_cgroup_region_low_show(struct seq_file * sf,void * v)770 static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v)
771 {
772 return dmemcg_limit_show(sf, v, get_resource_low);
773 }
774
dmem_cgroup_region_low_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)775 static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of,
776 char *buf, size_t nbytes, loff_t off)
777 {
778 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low);
779 }
780
dmem_cgroup_region_max_show(struct seq_file * sf,void * v)781 static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v)
782 {
783 return dmemcg_limit_show(sf, v, get_resource_max);
784 }
785
dmem_cgroup_region_max_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)786 static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of,
787 char *buf, size_t nbytes, loff_t off)
788 {
789 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max);
790 }
791
792 static struct cftype files[] = {
793 {
794 .name = "capacity",
795 .seq_show = dmem_cgroup_region_capacity_show,
796 .flags = CFTYPE_ONLY_ON_ROOT,
797 },
798 {
799 .name = "current",
800 .seq_show = dmem_cgroup_region_current_show,
801 },
802 {
803 .name = "min",
804 .write = dmem_cgroup_region_min_write,
805 .seq_show = dmem_cgroup_region_min_show,
806 .flags = CFTYPE_NOT_ON_ROOT,
807 },
808 {
809 .name = "low",
810 .write = dmem_cgroup_region_low_write,
811 .seq_show = dmem_cgroup_region_low_show,
812 .flags = CFTYPE_NOT_ON_ROOT,
813 },
814 {
815 .name = "max",
816 .write = dmem_cgroup_region_max_write,
817 .seq_show = dmem_cgroup_region_max_show,
818 .flags = CFTYPE_NOT_ON_ROOT,
819 },
820 { } /* Zero entry terminates. */
821 };
822
823 struct cgroup_subsys dmem_cgrp_subsys = {
824 .css_alloc = dmemcs_alloc,
825 .css_free = dmemcs_free,
826 .css_offline = dmemcs_offline,
827 .legacy_cftypes = files,
828 .dfl_cftypes = files,
829 };
830