xref: /linux/kernel/sched/cpuacct.c (revision 8a5f956a9fb7d74fff681145082acfad5afa6bb8)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * CPU accounting code for task groups.
5  *
6  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
7  * (balbir@in.ibm.com).
8  */
9 #include <linux/sched/cputime.h>
10 #include "sched.h"
11 
12 /* Time spent by the tasks of the CPU accounting group executing in ... */
13 enum cpuacct_stat_index {
14 	CPUACCT_STAT_USER,	/* ... user mode */
15 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
16 
17 	CPUACCT_STAT_NSTATS,
18 };
19 
20 static const char * const cpuacct_stat_desc[] = {
21 	[CPUACCT_STAT_USER] = "user",
22 	[CPUACCT_STAT_SYSTEM] = "system",
23 };
24 
25 /* track CPU usage of a group of tasks and its child groups */
26 struct cpuacct {
27 	struct cgroup_subsys_state	css;
28 	/* cpuusage holds pointer to a u64-type object on every CPU */
29 	u64 __percpu	*cpuusage;
30 	struct kernel_cpustat __percpu	*cpustat;
31 };
32 
33 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
34 {
35 	return css ? container_of(css, struct cpuacct, css) : NULL;
36 }
37 
38 /* Return CPU accounting group to which this task belongs */
39 static inline struct cpuacct *task_ca(struct task_struct *tsk)
40 {
41 	return css_ca(task_css(tsk, cpuacct_cgrp_id));
42 }
43 
44 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
45 {
46 	return css_ca(ca->css.parent);
47 }
48 
49 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
50 static struct cpuacct root_cpuacct = {
51 	.cpustat	= &kernel_cpustat,
52 	.cpuusage	= &root_cpuacct_cpuusage,
53 };
54 
55 /* Create a new CPU accounting group */
56 static struct cgroup_subsys_state *
57 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
58 {
59 	struct cpuacct *ca;
60 
61 	if (!parent_css)
62 		return &root_cpuacct.css;
63 
64 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
65 	if (!ca)
66 		goto out;
67 
68 	ca->cpuusage = alloc_percpu(u64);
69 	if (!ca->cpuusage)
70 		goto out_free_ca;
71 
72 	ca->cpustat = alloc_percpu(struct kernel_cpustat);
73 	if (!ca->cpustat)
74 		goto out_free_cpuusage;
75 
76 	return &ca->css;
77 
78 out_free_cpuusage:
79 	free_percpu(ca->cpuusage);
80 out_free_ca:
81 	kfree(ca);
82 out:
83 	return ERR_PTR(-ENOMEM);
84 }
85 
86 /* Destroy an existing CPU accounting group */
87 static void cpuacct_css_free(struct cgroup_subsys_state *css)
88 {
89 	struct cpuacct *ca = css_ca(css);
90 
91 	free_percpu(ca->cpustat);
92 	free_percpu(ca->cpuusage);
93 	kfree(ca);
94 }
95 
96 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
97 				 enum cpuacct_stat_index index)
98 {
99 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
100 	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
101 	u64 data;
102 
103 	/*
104 	 * We allow index == CPUACCT_STAT_NSTATS here to read
105 	 * the sum of usages.
106 	 */
107 	if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
108 		return 0;
109 
110 #ifndef CONFIG_64BIT
111 	/*
112 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
113 	 */
114 	raw_spin_rq_lock_irq(cpu_rq(cpu));
115 #endif
116 
117 	switch (index) {
118 	case CPUACCT_STAT_USER:
119 		data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
120 		break;
121 	case CPUACCT_STAT_SYSTEM:
122 		data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
123 			cpustat[CPUTIME_SOFTIRQ];
124 		break;
125 	case CPUACCT_STAT_NSTATS:
126 		data = *cpuusage;
127 		break;
128 	}
129 
130 #ifndef CONFIG_64BIT
131 	raw_spin_rq_unlock_irq(cpu_rq(cpu));
132 #endif
133 
134 	return data;
135 }
136 
137 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
138 {
139 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
140 	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
141 
142 	/* Don't allow to reset global kernel_cpustat */
143 	if (ca == &root_cpuacct)
144 		return;
145 
146 #ifndef CONFIG_64BIT
147 	/*
148 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
149 	 */
150 	raw_spin_rq_lock_irq(cpu_rq(cpu));
151 #endif
152 	*cpuusage = 0;
153 	cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
154 	cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
155 	cpustat[CPUTIME_SOFTIRQ] = 0;
156 
157 #ifndef CONFIG_64BIT
158 	raw_spin_rq_unlock_irq(cpu_rq(cpu));
159 #endif
160 }
161 
162 /* Return total CPU usage (in nanoseconds) of a group */
163 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
164 			   enum cpuacct_stat_index index)
165 {
166 	struct cpuacct *ca = css_ca(css);
167 	u64 totalcpuusage = 0;
168 	int i;
169 
170 	for_each_possible_cpu(i)
171 		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
172 
173 	return totalcpuusage;
174 }
175 
176 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
177 			      struct cftype *cft)
178 {
179 	return __cpuusage_read(css, CPUACCT_STAT_USER);
180 }
181 
182 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
183 			     struct cftype *cft)
184 {
185 	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
186 }
187 
188 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
189 {
190 	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
191 }
192 
193 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
194 			  u64 val)
195 {
196 	struct cpuacct *ca = css_ca(css);
197 	int cpu;
198 
199 	/*
200 	 * Only allow '0' here to do a reset.
201 	 */
202 	if (val)
203 		return -EINVAL;
204 
205 	for_each_possible_cpu(cpu)
206 		cpuacct_cpuusage_write(ca, cpu);
207 
208 	return 0;
209 }
210 
211 static int __cpuacct_percpu_seq_show(struct seq_file *m,
212 				     enum cpuacct_stat_index index)
213 {
214 	struct cpuacct *ca = css_ca(seq_css(m));
215 	u64 percpu;
216 	int i;
217 
218 	for_each_possible_cpu(i) {
219 		percpu = cpuacct_cpuusage_read(ca, i, index);
220 		seq_printf(m, "%llu ", (unsigned long long) percpu);
221 	}
222 	seq_printf(m, "\n");
223 	return 0;
224 }
225 
226 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
227 {
228 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
229 }
230 
231 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
232 {
233 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
234 }
235 
236 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
237 {
238 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
239 }
240 
241 static int cpuacct_all_seq_show(struct seq_file *m, void *V)
242 {
243 	struct cpuacct *ca = css_ca(seq_css(m));
244 	int index;
245 	int cpu;
246 
247 	seq_puts(m, "cpu");
248 	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
249 		seq_printf(m, " %s", cpuacct_stat_desc[index]);
250 	seq_puts(m, "\n");
251 
252 	for_each_possible_cpu(cpu) {
253 		seq_printf(m, "%d", cpu);
254 		for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
255 			seq_printf(m, " %llu",
256 				   cpuacct_cpuusage_read(ca, cpu, index));
257 		seq_puts(m, "\n");
258 	}
259 	return 0;
260 }
261 
262 static int cpuacct_stats_show(struct seq_file *sf, void *v)
263 {
264 	struct cpuacct *ca = css_ca(seq_css(sf));
265 	struct task_cputime cputime;
266 	u64 val[CPUACCT_STAT_NSTATS];
267 	int cpu;
268 	int stat;
269 
270 	memset(&cputime, 0, sizeof(cputime));
271 	for_each_possible_cpu(cpu) {
272 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
273 
274 		cputime.utime += cpustat[CPUTIME_USER];
275 		cputime.utime += cpustat[CPUTIME_NICE];
276 		cputime.stime += cpustat[CPUTIME_SYSTEM];
277 		cputime.stime += cpustat[CPUTIME_IRQ];
278 		cputime.stime += cpustat[CPUTIME_SOFTIRQ];
279 
280 		cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
281 	}
282 
283 	cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
284 		&val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
285 
286 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
287 		seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
288 			nsec_to_clock_t(val[stat]));
289 	}
290 
291 	return 0;
292 }
293 
294 static struct cftype files[] = {
295 	{
296 		.name = "usage",
297 		.read_u64 = cpuusage_read,
298 		.write_u64 = cpuusage_write,
299 	},
300 	{
301 		.name = "usage_user",
302 		.read_u64 = cpuusage_user_read,
303 	},
304 	{
305 		.name = "usage_sys",
306 		.read_u64 = cpuusage_sys_read,
307 	},
308 	{
309 		.name = "usage_percpu",
310 		.seq_show = cpuacct_percpu_seq_show,
311 	},
312 	{
313 		.name = "usage_percpu_user",
314 		.seq_show = cpuacct_percpu_user_seq_show,
315 	},
316 	{
317 		.name = "usage_percpu_sys",
318 		.seq_show = cpuacct_percpu_sys_seq_show,
319 	},
320 	{
321 		.name = "usage_all",
322 		.seq_show = cpuacct_all_seq_show,
323 	},
324 	{
325 		.name = "stat",
326 		.seq_show = cpuacct_stats_show,
327 	},
328 	{ }	/* terminate */
329 };
330 
331 /*
332  * charge this task's execution time to its accounting group.
333  *
334  * called with rq->lock held.
335  */
336 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
337 {
338 	unsigned int cpu = task_cpu(tsk);
339 	struct cpuacct *ca;
340 
341 	lockdep_assert_rq_held(cpu_rq(cpu));
342 
343 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
344 		*per_cpu_ptr(ca->cpuusage, cpu) += cputime;
345 }
346 
347 /*
348  * Add user/system time to cpuacct.
349  *
350  * Note: it's the caller that updates the account of the root cgroup.
351  */
352 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
353 {
354 	struct cpuacct *ca;
355 
356 	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
357 		__this_cpu_add(ca->cpustat->cpustat[index], val);
358 }
359 
360 struct cgroup_subsys cpuacct_cgrp_subsys = {
361 	.css_alloc	= cpuacct_css_alloc,
362 	.css_free	= cpuacct_css_free,
363 	.legacy_cftypes	= files,
364 	.early_init	= true,
365 };
366