1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Miscellaneous cgroup controller
4 *
5 * Copyright 2020 Google LLC
6 * Author: Vipin Sharma <vipinsh@google.com>
7 */
8
9 #include <linux/limits.h>
10 #include <linux/cgroup.h>
11 #include <linux/errno.h>
12 #include <linux/atomic.h>
13 #include <linux/slab.h>
14 #include <linux/misc_cgroup.h>
15
16 #define MAX_STR "max"
17 #define MAX_NUM U64_MAX
18
19 /* Miscellaneous res name, keep it in sync with enum misc_res_type */
20 static const char *const misc_res_name[] = {
21 #ifdef CONFIG_KVM_AMD_SEV
22 /* AMD SEV ASIDs resource */
23 "sev",
24 /* AMD SEV-ES ASIDs resource */
25 "sev_es",
26 #endif
27 #ifdef CONFIG_INTEL_TDX_HOST
28 /* Intel TDX HKIDs resource */
29 "tdx",
30 #endif
31 };
32
33 /* Root misc cgroup */
34 static struct misc_cg root_cg;
35
36 /*
37 * Miscellaneous resources capacity for the entire machine. 0 capacity means
38 * resource is not initialized or not present in the host.
39 *
40 * root_cg.max and capacity are independent of each other. root_cg.max can be
41 * more than the actual capacity. We are using Limits resource distribution
42 * model of cgroup for miscellaneous controller.
43 */
44 static u64 misc_res_capacity[MISC_CG_RES_TYPES];
45
46 /**
47 * parent_misc() - Get the parent of the passed misc cgroup.
48 * @cgroup: cgroup whose parent needs to be fetched.
49 *
50 * Context: Any context.
51 * Return:
52 * * struct misc_cg* - Parent of the @cgroup.
53 * * %NULL - If @cgroup is null or the passed cgroup does not have a parent.
54 */
parent_misc(struct misc_cg * cgroup)55 static struct misc_cg *parent_misc(struct misc_cg *cgroup)
56 {
57 return cgroup ? css_misc(cgroup->css.parent) : NULL;
58 }
59
60 /**
61 * valid_type() - Check if @type is valid or not.
62 * @type: misc res type.
63 *
64 * Context: Any context.
65 * Return:
66 * * true - If valid type.
67 * * false - If not valid type.
68 */
valid_type(enum misc_res_type type)69 static inline bool valid_type(enum misc_res_type type)
70 {
71 return type >= 0 && type < MISC_CG_RES_TYPES;
72 }
73
74 /**
75 * misc_cg_set_capacity() - Set the capacity of the misc cgroup res.
76 * @type: Type of the misc res.
77 * @capacity: Supported capacity of the misc res on the host.
78 *
79 * If capacity is 0 then the charging a misc cgroup fails for that type.
80 *
81 * Context: Any context.
82 * Return:
83 * * %0 - Successfully registered the capacity.
84 * * %-EINVAL - If @type is invalid.
85 */
misc_cg_set_capacity(enum misc_res_type type,u64 capacity)86 int misc_cg_set_capacity(enum misc_res_type type, u64 capacity)
87 {
88 if (!valid_type(type))
89 return -EINVAL;
90
91 WRITE_ONCE(misc_res_capacity[type], capacity);
92 return 0;
93 }
94 EXPORT_SYMBOL_GPL(misc_cg_set_capacity);
95
96 /**
97 * misc_cg_cancel_charge() - Cancel the charge from the misc cgroup.
98 * @type: Misc res type in misc cg to cancel the charge from.
99 * @cg: Misc cgroup to cancel charge from.
100 * @amount: Amount to cancel.
101 *
102 * Context: Any context.
103 */
misc_cg_cancel_charge(enum misc_res_type type,struct misc_cg * cg,u64 amount)104 static void misc_cg_cancel_charge(enum misc_res_type type, struct misc_cg *cg,
105 u64 amount)
106 {
107 WARN_ONCE(atomic64_add_negative(-amount, &cg->res[type].usage),
108 "misc cgroup resource %s became less than 0",
109 misc_res_name[type]);
110 }
111
misc_cg_update_watermark(struct misc_res * res,u64 new_usage)112 static void misc_cg_update_watermark(struct misc_res *res, u64 new_usage)
113 {
114 u64 old;
115
116 while (true) {
117 old = atomic64_read(&res->watermark);
118 if (new_usage <= old)
119 break;
120 if (atomic64_cmpxchg(&res->watermark, old, new_usage) == old)
121 break;
122 }
123 }
124
misc_cg_event(enum misc_res_type type,struct misc_cg * cg)125 static void misc_cg_event(enum misc_res_type type, struct misc_cg *cg)
126 {
127 atomic64_inc(&cg->res[type].events_local);
128 cgroup_file_notify(&cg->events_local_file);
129
130 for (; parent_misc(cg); cg = parent_misc(cg)) {
131 atomic64_inc(&cg->res[type].events);
132 cgroup_file_notify(&cg->events_file);
133 }
134 }
135
136 /**
137 * misc_cg_try_charge() - Try charging the misc cgroup.
138 * @type: Misc res type to charge.
139 * @cg: Misc cgroup which will be charged.
140 * @amount: Amount to charge.
141 *
142 * Charge @amount to the misc cgroup. Caller must use the same cgroup during
143 * the uncharge call.
144 *
145 * Context: Any context.
146 * Return:
147 * * %0 - If successfully charged.
148 * * -EINVAL - If @type is invalid or misc res has 0 capacity.
149 * * -EBUSY - If max limit will be crossed or total usage will be more than the
150 * capacity.
151 */
misc_cg_try_charge(enum misc_res_type type,struct misc_cg * cg,u64 amount)152 int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount)
153 {
154 struct misc_cg *i, *j;
155 int ret;
156 struct misc_res *res;
157 u64 new_usage;
158
159 if (!(valid_type(type) && cg && READ_ONCE(misc_res_capacity[type])))
160 return -EINVAL;
161
162 if (!amount)
163 return 0;
164
165 for (i = cg; i; i = parent_misc(i)) {
166 res = &i->res[type];
167
168 new_usage = atomic64_add_return(amount, &res->usage);
169 if (new_usage > READ_ONCE(res->max) ||
170 new_usage > READ_ONCE(misc_res_capacity[type])) {
171 ret = -EBUSY;
172 goto err_charge;
173 }
174 misc_cg_update_watermark(res, new_usage);
175 }
176 return 0;
177
178 err_charge:
179 misc_cg_event(type, i);
180
181 for (j = cg; j != i; j = parent_misc(j))
182 misc_cg_cancel_charge(type, j, amount);
183 misc_cg_cancel_charge(type, i, amount);
184 return ret;
185 }
186 EXPORT_SYMBOL_GPL(misc_cg_try_charge);
187
188 /**
189 * misc_cg_uncharge() - Uncharge the misc cgroup.
190 * @type: Misc res type which was charged.
191 * @cg: Misc cgroup which will be uncharged.
192 * @amount: Charged amount.
193 *
194 * Context: Any context.
195 */
misc_cg_uncharge(enum misc_res_type type,struct misc_cg * cg,u64 amount)196 void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount)
197 {
198 struct misc_cg *i;
199
200 if (!(amount && valid_type(type) && cg))
201 return;
202
203 for (i = cg; i; i = parent_misc(i))
204 misc_cg_cancel_charge(type, i, amount);
205 }
206 EXPORT_SYMBOL_GPL(misc_cg_uncharge);
207
208 /**
209 * misc_cg_max_show() - Show the misc cgroup max limit.
210 * @sf: Interface file
211 * @v: Arguments passed
212 *
213 * Context: Any context.
214 * Return: 0 to denote successful print.
215 */
misc_cg_max_show(struct seq_file * sf,void * v)216 static int misc_cg_max_show(struct seq_file *sf, void *v)
217 {
218 int i;
219 struct misc_cg *cg = css_misc(seq_css(sf));
220 u64 max;
221
222 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
223 if (READ_ONCE(misc_res_capacity[i])) {
224 max = READ_ONCE(cg->res[i].max);
225 if (max == MAX_NUM)
226 seq_printf(sf, "%s max\n", misc_res_name[i]);
227 else
228 seq_printf(sf, "%s %llu\n", misc_res_name[i],
229 max);
230 }
231 }
232
233 return 0;
234 }
235
236 /**
237 * misc_cg_max_write() - Update the maximum limit of the cgroup.
238 * @of: Handler for the file.
239 * @buf: Data from the user. It should be either "max", 0, or a positive
240 * integer.
241 * @nbytes: Number of bytes of the data.
242 * @off: Offset in the file.
243 *
244 * User can pass data like:
245 * echo sev 23 > misc.max, OR
246 * echo sev max > misc.max
247 *
248 * Context: Any context.
249 * Return:
250 * * >= 0 - Number of bytes processed in the input.
251 * * -EINVAL - If buf is not valid.
252 * * -ERANGE - If number is bigger than the u64 capacity.
253 */
misc_cg_max_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)254 static ssize_t misc_cg_max_write(struct kernfs_open_file *of, char *buf,
255 size_t nbytes, loff_t off)
256 {
257 struct misc_cg *cg;
258 u64 max;
259 int ret = 0, i;
260 enum misc_res_type type = MISC_CG_RES_TYPES;
261 char *token;
262
263 buf = strstrip(buf);
264 token = strsep(&buf, " ");
265
266 if (!token || !buf)
267 return -EINVAL;
268
269 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
270 if (!strcmp(misc_res_name[i], token)) {
271 type = i;
272 break;
273 }
274 }
275
276 if (type == MISC_CG_RES_TYPES)
277 return -EINVAL;
278
279 if (!strcmp(MAX_STR, buf)) {
280 max = MAX_NUM;
281 } else {
282 ret = kstrtou64(buf, 0, &max);
283 if (ret)
284 return ret;
285 }
286
287 cg = css_misc(of_css(of));
288
289 if (READ_ONCE(misc_res_capacity[type]))
290 WRITE_ONCE(cg->res[type].max, max);
291 else
292 ret = -EINVAL;
293
294 return ret ? ret : nbytes;
295 }
296
297 /**
298 * misc_cg_current_show() - Show the current usage of the misc cgroup.
299 * @sf: Interface file
300 * @v: Arguments passed
301 *
302 * Context: Any context.
303 * Return: 0 to denote successful print.
304 */
misc_cg_current_show(struct seq_file * sf,void * v)305 static int misc_cg_current_show(struct seq_file *sf, void *v)
306 {
307 int i;
308 u64 usage;
309 struct misc_cg *cg = css_misc(seq_css(sf));
310
311 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
312 usage = atomic64_read(&cg->res[i].usage);
313 if (READ_ONCE(misc_res_capacity[i]) || usage)
314 seq_printf(sf, "%s %llu\n", misc_res_name[i], usage);
315 }
316
317 return 0;
318 }
319
320 /**
321 * misc_cg_peak_show() - Show the peak usage of the misc cgroup.
322 * @sf: Interface file
323 * @v: Arguments passed
324 *
325 * Context: Any context.
326 * Return: 0 to denote successful print.
327 */
misc_cg_peak_show(struct seq_file * sf,void * v)328 static int misc_cg_peak_show(struct seq_file *sf, void *v)
329 {
330 int i;
331 u64 watermark;
332 struct misc_cg *cg = css_misc(seq_css(sf));
333
334 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
335 watermark = atomic64_read(&cg->res[i].watermark);
336 if (READ_ONCE(misc_res_capacity[i]) || watermark)
337 seq_printf(sf, "%s %llu\n", misc_res_name[i], watermark);
338 }
339
340 return 0;
341 }
342
343 /**
344 * misc_cg_capacity_show() - Show the total capacity of misc res on the host.
345 * @sf: Interface file
346 * @v: Arguments passed
347 *
348 * Only present in the root cgroup directory.
349 *
350 * Context: Any context.
351 * Return: 0 to denote successful print.
352 */
misc_cg_capacity_show(struct seq_file * sf,void * v)353 static int misc_cg_capacity_show(struct seq_file *sf, void *v)
354 {
355 int i;
356 u64 cap;
357
358 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
359 cap = READ_ONCE(misc_res_capacity[i]);
360 if (cap)
361 seq_printf(sf, "%s %llu\n", misc_res_name[i], cap);
362 }
363
364 return 0;
365 }
366
__misc_events_show(struct seq_file * sf,bool local)367 static int __misc_events_show(struct seq_file *sf, bool local)
368 {
369 struct misc_cg *cg = css_misc(seq_css(sf));
370 u64 events;
371 int i;
372
373 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
374 if (local)
375 events = atomic64_read(&cg->res[i].events_local);
376 else
377 events = atomic64_read(&cg->res[i].events);
378 if (READ_ONCE(misc_res_capacity[i]) || events)
379 seq_printf(sf, "%s.max %llu\n", misc_res_name[i], events);
380 }
381 return 0;
382 }
383
misc_events_show(struct seq_file * sf,void * v)384 static int misc_events_show(struct seq_file *sf, void *v)
385 {
386 return __misc_events_show(sf, false);
387 }
388
misc_events_local_show(struct seq_file * sf,void * v)389 static int misc_events_local_show(struct seq_file *sf, void *v)
390 {
391 return __misc_events_show(sf, true);
392 }
393
394 /* Misc cgroup interface files */
395 static struct cftype misc_cg_files[] = {
396 {
397 .name = "max",
398 .write = misc_cg_max_write,
399 .seq_show = misc_cg_max_show,
400 .flags = CFTYPE_NOT_ON_ROOT,
401 },
402 {
403 .name = "current",
404 .seq_show = misc_cg_current_show,
405 },
406 {
407 .name = "peak",
408 .seq_show = misc_cg_peak_show,
409 },
410 {
411 .name = "capacity",
412 .seq_show = misc_cg_capacity_show,
413 .flags = CFTYPE_ONLY_ON_ROOT,
414 },
415 {
416 .name = "events",
417 .flags = CFTYPE_NOT_ON_ROOT,
418 .file_offset = offsetof(struct misc_cg, events_file),
419 .seq_show = misc_events_show,
420 },
421 {
422 .name = "events.local",
423 .flags = CFTYPE_NOT_ON_ROOT,
424 .file_offset = offsetof(struct misc_cg, events_local_file),
425 .seq_show = misc_events_local_show,
426 },
427 {}
428 };
429
430 /**
431 * misc_cg_alloc() - Allocate misc cgroup.
432 * @parent_css: Parent cgroup.
433 *
434 * Context: Process context.
435 * Return:
436 * * struct cgroup_subsys_state* - css of the allocated cgroup.
437 * * ERR_PTR(-ENOMEM) - No memory available to allocate.
438 */
439 static struct cgroup_subsys_state *
misc_cg_alloc(struct cgroup_subsys_state * parent_css)440 misc_cg_alloc(struct cgroup_subsys_state *parent_css)
441 {
442 enum misc_res_type i;
443 struct misc_cg *cg;
444
445 if (!parent_css) {
446 cg = &root_cg;
447 } else {
448 cg = kzalloc(sizeof(*cg), GFP_KERNEL);
449 if (!cg)
450 return ERR_PTR(-ENOMEM);
451 }
452
453 for (i = 0; i < MISC_CG_RES_TYPES; i++) {
454 WRITE_ONCE(cg->res[i].max, MAX_NUM);
455 atomic64_set(&cg->res[i].usage, 0);
456 }
457
458 return &cg->css;
459 }
460
461 /**
462 * misc_cg_free() - Free the misc cgroup.
463 * @css: cgroup subsys object.
464 *
465 * Context: Any context.
466 */
misc_cg_free(struct cgroup_subsys_state * css)467 static void misc_cg_free(struct cgroup_subsys_state *css)
468 {
469 kfree(css_misc(css));
470 }
471
472 /* Cgroup controller callbacks */
473 struct cgroup_subsys misc_cgrp_subsys = {
474 .css_alloc = misc_cg_alloc,
475 .css_free = misc_cg_free,
476 .legacy_cftypes = misc_cg_files,
477 .dfl_cftypes = misc_cg_files,
478 };
479