1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * CPU accounting code for task groups.
5 *
6 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
7 * (balbir@in.ibm.com).
8 */
9 #include <linux/sched/cputime.h>
10 #include "sched.h"
11
12 /* Time spent by the tasks of the CPU accounting group executing in ... */
13 enum cpuacct_stat_index {
14 CPUACCT_STAT_USER, /* ... user mode */
15 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
16
17 CPUACCT_STAT_NSTATS,
18 };
19
20 static const char * const cpuacct_stat_desc[] = {
21 [CPUACCT_STAT_USER] = "user",
22 [CPUACCT_STAT_SYSTEM] = "system",
23 };
24
25 /* track CPU usage of a group of tasks and its child groups */
26 struct cpuacct {
27 struct cgroup_subsys_state css;
28 /* cpuusage holds pointer to a u64-type object on every CPU */
29 u64 __percpu *cpuusage;
30 struct kernel_cpustat __percpu *cpustat;
31 };
32
css_ca(struct cgroup_subsys_state * css)33 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
34 {
35 return css ? container_of(css, struct cpuacct, css) : NULL;
36 }
37
38 /* Return CPU accounting group to which this task belongs */
task_ca(struct task_struct * tsk)39 static inline struct cpuacct *task_ca(struct task_struct *tsk)
40 {
41 return css_ca(task_css(tsk, cpuacct_cgrp_id));
42 }
43
parent_ca(struct cpuacct * ca)44 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
45 {
46 return css_ca(ca->css.parent);
47 }
48
49 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
50 static struct cpuacct root_cpuacct = {
51 .cpustat = &kernel_cpustat,
52 .cpuusage = &root_cpuacct_cpuusage,
53 };
54
55 /* Create a new CPU accounting group */
56 static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state * parent_css)57 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
58 {
59 struct cpuacct *ca;
60
61 if (!parent_css)
62 return &root_cpuacct.css;
63
64 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
65 if (!ca)
66 goto out;
67
68 ca->cpuusage = alloc_percpu(u64);
69 if (!ca->cpuusage)
70 goto out_free_ca;
71
72 ca->cpustat = alloc_percpu(struct kernel_cpustat);
73 if (!ca->cpustat)
74 goto out_free_cpuusage;
75
76 return &ca->css;
77
78 out_free_cpuusage:
79 free_percpu(ca->cpuusage);
80 out_free_ca:
81 kfree(ca);
82 out:
83 return ERR_PTR(-ENOMEM);
84 }
85
86 /* Destroy an existing CPU accounting group */
cpuacct_css_free(struct cgroup_subsys_state * css)87 static void cpuacct_css_free(struct cgroup_subsys_state *css)
88 {
89 struct cpuacct *ca = css_ca(css);
90
91 free_percpu(ca->cpustat);
92 free_percpu(ca->cpuusage);
93 kfree(ca);
94 }
95
cpuacct_cpuusage_read(struct cpuacct * ca,int cpu,enum cpuacct_stat_index index)96 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
97 enum cpuacct_stat_index index)
98 {
99 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
100 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
101 u64 data;
102
103 /*
104 * We allow index == CPUACCT_STAT_NSTATS here to read
105 * the sum of usages.
106 */
107 if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
108 return 0;
109
110 #ifndef CONFIG_64BIT
111 /*
112 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
113 */
114 raw_spin_rq_lock_irq(cpu_rq(cpu));
115 #endif
116
117 switch (index) {
118 case CPUACCT_STAT_USER:
119 data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
120 break;
121 case CPUACCT_STAT_SYSTEM:
122 data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
123 cpustat[CPUTIME_SOFTIRQ];
124 break;
125 case CPUACCT_STAT_NSTATS:
126 data = *cpuusage;
127 break;
128 }
129
130 #ifndef CONFIG_64BIT
131 raw_spin_rq_unlock_irq(cpu_rq(cpu));
132 #endif
133
134 return data;
135 }
136
cpuacct_cpuusage_write(struct cpuacct * ca,int cpu)137 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
138 {
139 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
140 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
141
142 /* Don't allow to reset global kernel_cpustat */
143 if (ca == &root_cpuacct)
144 return;
145
146 #ifndef CONFIG_64BIT
147 /*
148 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
149 */
150 raw_spin_rq_lock_irq(cpu_rq(cpu));
151 #endif
152 *cpuusage = 0;
153 cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
154 cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
155 cpustat[CPUTIME_SOFTIRQ] = 0;
156
157 #ifndef CONFIG_64BIT
158 raw_spin_rq_unlock_irq(cpu_rq(cpu));
159 #endif
160 }
161
162 /* Return total CPU usage (in nanoseconds) of a group */
__cpuusage_read(struct cgroup_subsys_state * css,enum cpuacct_stat_index index)163 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
164 enum cpuacct_stat_index index)
165 {
166 struct cpuacct *ca = css_ca(css);
167 u64 totalcpuusage = 0;
168 int i;
169
170 for_each_possible_cpu(i)
171 totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
172
173 return totalcpuusage;
174 }
175
cpuusage_user_read(struct cgroup_subsys_state * css,struct cftype * cft)176 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
177 struct cftype *cft)
178 {
179 return __cpuusage_read(css, CPUACCT_STAT_USER);
180 }
181
cpuusage_sys_read(struct cgroup_subsys_state * css,struct cftype * cft)182 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
183 struct cftype *cft)
184 {
185 return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
186 }
187
cpuusage_read(struct cgroup_subsys_state * css,struct cftype * cft)188 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
189 {
190 return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
191 }
192
cpuusage_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)193 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
194 u64 val)
195 {
196 struct cpuacct *ca = css_ca(css);
197 int cpu;
198
199 /*
200 * Only allow '0' here to do a reset.
201 */
202 if (val)
203 return -EINVAL;
204
205 for_each_possible_cpu(cpu)
206 cpuacct_cpuusage_write(ca, cpu);
207
208 return 0;
209 }
210
__cpuacct_percpu_seq_show(struct seq_file * m,enum cpuacct_stat_index index)211 static int __cpuacct_percpu_seq_show(struct seq_file *m,
212 enum cpuacct_stat_index index)
213 {
214 struct cpuacct *ca = css_ca(seq_css(m));
215 u64 percpu;
216 int i;
217
218 for_each_possible_cpu(i) {
219 percpu = cpuacct_cpuusage_read(ca, i, index);
220 seq_printf(m, "%llu ", (unsigned long long) percpu);
221 }
222 seq_printf(m, "\n");
223 return 0;
224 }
225
cpuacct_percpu_user_seq_show(struct seq_file * m,void * V)226 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
227 {
228 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
229 }
230
cpuacct_percpu_sys_seq_show(struct seq_file * m,void * V)231 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
232 {
233 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
234 }
235
cpuacct_percpu_seq_show(struct seq_file * m,void * V)236 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
237 {
238 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
239 }
240
cpuacct_all_seq_show(struct seq_file * m,void * V)241 static int cpuacct_all_seq_show(struct seq_file *m, void *V)
242 {
243 struct cpuacct *ca = css_ca(seq_css(m));
244 int index;
245 int cpu;
246
247 seq_puts(m, "cpu");
248 for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
249 seq_printf(m, " %s", cpuacct_stat_desc[index]);
250 seq_puts(m, "\n");
251
252 for_each_possible_cpu(cpu) {
253 seq_printf(m, "%d", cpu);
254 for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
255 seq_printf(m, " %llu",
256 cpuacct_cpuusage_read(ca, cpu, index));
257 seq_puts(m, "\n");
258 }
259 return 0;
260 }
261
cpuacct_stats_show(struct seq_file * sf,void * v)262 static int cpuacct_stats_show(struct seq_file *sf, void *v)
263 {
264 struct cpuacct *ca = css_ca(seq_css(sf));
265 struct task_cputime cputime;
266 u64 val[CPUACCT_STAT_NSTATS];
267 int cpu;
268 int stat;
269
270 memset(&cputime, 0, sizeof(cputime));
271 for_each_possible_cpu(cpu) {
272 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
273
274 cputime.utime += cpustat[CPUTIME_USER];
275 cputime.utime += cpustat[CPUTIME_NICE];
276 cputime.stime += cpustat[CPUTIME_SYSTEM];
277 cputime.stime += cpustat[CPUTIME_IRQ];
278 cputime.stime += cpustat[CPUTIME_SOFTIRQ];
279
280 cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
281 }
282
283 cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
284 &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
285
286 for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
287 seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
288 nsec_to_clock_t(val[stat]));
289 }
290
291 return 0;
292 }
293
294 static struct cftype files[] = {
295 {
296 .name = "usage",
297 .read_u64 = cpuusage_read,
298 .write_u64 = cpuusage_write,
299 },
300 {
301 .name = "usage_user",
302 .read_u64 = cpuusage_user_read,
303 },
304 {
305 .name = "usage_sys",
306 .read_u64 = cpuusage_sys_read,
307 },
308 {
309 .name = "usage_percpu",
310 .seq_show = cpuacct_percpu_seq_show,
311 },
312 {
313 .name = "usage_percpu_user",
314 .seq_show = cpuacct_percpu_user_seq_show,
315 },
316 {
317 .name = "usage_percpu_sys",
318 .seq_show = cpuacct_percpu_sys_seq_show,
319 },
320 {
321 .name = "usage_all",
322 .seq_show = cpuacct_all_seq_show,
323 },
324 {
325 .name = "stat",
326 .seq_show = cpuacct_stats_show,
327 },
328 { } /* terminate */
329 };
330
331 /*
332 * charge this task's execution time to its accounting group.
333 *
334 * called with rq->lock held.
335 */
cpuacct_charge(struct task_struct * tsk,u64 cputime)336 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
337 {
338 unsigned int cpu = task_cpu(tsk);
339 struct cpuacct *ca;
340
341 lockdep_assert_rq_held(cpu_rq(cpu));
342
343 for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
344 *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
345 }
346
347 /*
348 * Add user/system time to cpuacct.
349 *
350 * Note: it's the caller that updates the account of the root cgroup.
351 */
cpuacct_account_field(struct task_struct * tsk,int index,u64 val)352 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
353 {
354 struct cpuacct *ca;
355
356 for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
357 __this_cpu_add(ca->cpustat->cpustat[index], val);
358 }
359
360 struct cgroup_subsys cpuacct_cgrp_subsys = {
361 .css_alloc = cpuacct_css_alloc,
362 .css_free = cpuacct_css_free,
363 .legacy_cftypes = files,
364 .early_init = true,
365 };
366