xref: /linux/kernel/delayacct.c (revision af13ff1c33e043b746cd96c83c7660ddf0272f73)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* delayacct.c - per-task delay accounting
3  *
4  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/sched/task.h>
9 #include <linux/sched/cputime.h>
10 #include <linux/sched/clock.h>
11 #include <linux/slab.h>
12 #include <linux/taskstats.h>
13 #include <linux/sysctl.h>
14 #include <linux/delayacct.h>
15 #include <linux/module.h>
16 
17 DEFINE_STATIC_KEY_FALSE(delayacct_key);
18 int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
19 struct kmem_cache *delayacct_cache;
20 
21 static void set_delayacct(bool enabled)
22 {
23 	if (enabled) {
24 		static_branch_enable(&delayacct_key);
25 		delayacct_on = 1;
26 	} else {
27 		delayacct_on = 0;
28 		static_branch_disable(&delayacct_key);
29 	}
30 }
31 
32 static int __init delayacct_setup_enable(char *str)
33 {
34 	delayacct_on = 1;
35 	return 1;
36 }
37 __setup("delayacct", delayacct_setup_enable);
38 
39 void delayacct_init(void)
40 {
41 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
42 	delayacct_tsk_init(&init_task);
43 	set_delayacct(delayacct_on);
44 }
45 
46 #ifdef CONFIG_PROC_SYSCTL
47 static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer,
48 		     size_t *lenp, loff_t *ppos)
49 {
50 	int state = delayacct_on;
51 	struct ctl_table t;
52 	int err;
53 
54 	if (write && !capable(CAP_SYS_ADMIN))
55 		return -EPERM;
56 
57 	t = *table;
58 	t.data = &state;
59 	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
60 	if (err < 0)
61 		return err;
62 	if (write)
63 		set_delayacct(state);
64 	return err;
65 }
66 
67 static const struct ctl_table kern_delayacct_table[] = {
68 	{
69 		.procname       = "task_delayacct",
70 		.data           = NULL,
71 		.maxlen         = sizeof(unsigned int),
72 		.mode           = 0644,
73 		.proc_handler   = sysctl_delayacct,
74 		.extra1         = SYSCTL_ZERO,
75 		.extra2         = SYSCTL_ONE,
76 	},
77 };
78 
79 static __init int kernel_delayacct_sysctls_init(void)
80 {
81 	register_sysctl_init("kernel", kern_delayacct_table);
82 	return 0;
83 }
84 late_initcall(kernel_delayacct_sysctls_init);
85 #endif
86 
87 void __delayacct_tsk_init(struct task_struct *tsk)
88 {
89 	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
90 	if (tsk->delays)
91 		raw_spin_lock_init(&tsk->delays->lock);
92 }
93 
94 /*
95  * Finish delay accounting for a statistic using its timestamps (@start),
96  * accumulator (@total) and @count
97  */
98 static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
99 {
100 	s64 ns = local_clock() - *start;
101 	unsigned long flags;
102 
103 	if (ns > 0) {
104 		raw_spin_lock_irqsave(lock, flags);
105 		*total += ns;
106 		(*count)++;
107 		if (ns > *max)
108 			*max = ns;
109 		if (*min == 0 || ns < *min)
110 			*min = ns;
111 		raw_spin_unlock_irqrestore(lock, flags);
112 	}
113 }
114 
115 void __delayacct_blkio_start(void)
116 {
117 	current->delays->blkio_start = local_clock();
118 }
119 
120 /*
121  * We cannot rely on the `current` macro, as we haven't yet switched back to
122  * the process being woken.
123  */
124 void __delayacct_blkio_end(struct task_struct *p)
125 {
126 	delayacct_end(&p->delays->lock,
127 		      &p->delays->blkio_start,
128 		      &p->delays->blkio_delay,
129 		      &p->delays->blkio_count,
130 		      &p->delays->blkio_delay_max,
131 		      &p->delays->blkio_delay_min);
132 }
133 
134 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
135 {
136 	u64 utime, stime, stimescaled, utimescaled;
137 	unsigned long long t2, t3;
138 	unsigned long flags, t1;
139 	s64 tmp;
140 
141 	task_cputime(tsk, &utime, &stime);
142 	tmp = (s64)d->cpu_run_real_total;
143 	tmp += utime + stime;
144 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
145 
146 	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
147 	tmp = (s64)d->cpu_scaled_run_real_total;
148 	tmp += utimescaled + stimescaled;
149 	d->cpu_scaled_run_real_total =
150 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
151 
152 	/*
153 	 * No locking available for sched_info (and too expensive to add one)
154 	 * Mitigate by taking snapshot of values
155 	 */
156 	t1 = tsk->sched_info.pcount;
157 	t2 = tsk->sched_info.run_delay;
158 	t3 = tsk->se.sum_exec_runtime;
159 
160 	d->cpu_count += t1;
161 
162 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
163 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
164 	tmp = (s64)d->cpu_delay_total + t2;
165 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
166 	tmp = (s64)d->cpu_run_virtual_total + t3;
167 
168 	d->cpu_run_virtual_total =
169 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
170 
171 	if (!tsk->delays)
172 		return 0;
173 
174 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
175 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
176 	d->blkio_delay_max = tsk->delays->blkio_delay_max;
177 	d->blkio_delay_min = tsk->delays->blkio_delay_min;
178 	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
179 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
180 	d->swapin_delay_max = tsk->delays->swapin_delay_max;
181 	d->swapin_delay_min = tsk->delays->swapin_delay_min;
182 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
183 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
184 	d->freepages_delay_max = tsk->delays->freepages_delay_max;
185 	d->freepages_delay_min = tsk->delays->freepages_delay_min;
186 	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
187 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
188 	d->thrashing_delay_max = tsk->delays->thrashing_delay_max;
189 	d->thrashing_delay_min = tsk->delays->thrashing_delay_min;
190 	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
191 	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
192 	d->compact_delay_max = tsk->delays->compact_delay_max;
193 	d->compact_delay_min = tsk->delays->compact_delay_min;
194 	tmp = d->compact_delay_total + tsk->delays->compact_delay;
195 	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
196 	d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max;
197 	d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min;
198 	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
199 	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
200 	d->irq_delay_max = tsk->delays->irq_delay_max;
201 	d->irq_delay_min = tsk->delays->irq_delay_min;
202 	tmp = d->irq_delay_total + tsk->delays->irq_delay;
203 	d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
204 	d->blkio_count += tsk->delays->blkio_count;
205 	d->swapin_count += tsk->delays->swapin_count;
206 	d->freepages_count += tsk->delays->freepages_count;
207 	d->thrashing_count += tsk->delays->thrashing_count;
208 	d->compact_count += tsk->delays->compact_count;
209 	d->wpcopy_count += tsk->delays->wpcopy_count;
210 	d->irq_count += tsk->delays->irq_count;
211 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
212 
213 	return 0;
214 }
215 
216 __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
217 {
218 	__u64 ret;
219 	unsigned long flags;
220 
221 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
222 	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
223 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
224 	return ret;
225 }
226 
227 void __delayacct_freepages_start(void)
228 {
229 	current->delays->freepages_start = local_clock();
230 }
231 
232 void __delayacct_freepages_end(void)
233 {
234 	delayacct_end(&current->delays->lock,
235 		      &current->delays->freepages_start,
236 		      &current->delays->freepages_delay,
237 		      &current->delays->freepages_count,
238 		      &current->delays->freepages_delay_max,
239 		      &current->delays->freepages_delay_min);
240 }
241 
242 void __delayacct_thrashing_start(bool *in_thrashing)
243 {
244 	*in_thrashing = !!current->in_thrashing;
245 	if (*in_thrashing)
246 		return;
247 
248 	current->in_thrashing = 1;
249 	current->delays->thrashing_start = local_clock();
250 }
251 
252 void __delayacct_thrashing_end(bool *in_thrashing)
253 {
254 	if (*in_thrashing)
255 		return;
256 
257 	current->in_thrashing = 0;
258 	delayacct_end(&current->delays->lock,
259 		      &current->delays->thrashing_start,
260 		      &current->delays->thrashing_delay,
261 		      &current->delays->thrashing_count,
262 		      &current->delays->thrashing_delay_max,
263 		      &current->delays->thrashing_delay_min);
264 }
265 
266 void __delayacct_swapin_start(void)
267 {
268 	current->delays->swapin_start = local_clock();
269 }
270 
271 void __delayacct_swapin_end(void)
272 {
273 	delayacct_end(&current->delays->lock,
274 		      &current->delays->swapin_start,
275 		      &current->delays->swapin_delay,
276 		      &current->delays->swapin_count,
277 		      &current->delays->swapin_delay_max,
278 		      &current->delays->swapin_delay_min);
279 }
280 
281 void __delayacct_compact_start(void)
282 {
283 	current->delays->compact_start = local_clock();
284 }
285 
286 void __delayacct_compact_end(void)
287 {
288 	delayacct_end(&current->delays->lock,
289 		      &current->delays->compact_start,
290 		      &current->delays->compact_delay,
291 		      &current->delays->compact_count,
292 		      &current->delays->compact_delay_max,
293 		      &current->delays->compact_delay_min);
294 }
295 
296 void __delayacct_wpcopy_start(void)
297 {
298 	current->delays->wpcopy_start = local_clock();
299 }
300 
301 void __delayacct_wpcopy_end(void)
302 {
303 	delayacct_end(&current->delays->lock,
304 		      &current->delays->wpcopy_start,
305 		      &current->delays->wpcopy_delay,
306 		      &current->delays->wpcopy_count,
307 		      &current->delays->wpcopy_delay_max,
308 		      &current->delays->wpcopy_delay_min);
309 }
310 
311 void __delayacct_irq(struct task_struct *task, u32 delta)
312 {
313 	unsigned long flags;
314 
315 	raw_spin_lock_irqsave(&task->delays->lock, flags);
316 	task->delays->irq_delay += delta;
317 	task->delays->irq_count++;
318 	if (delta > task->delays->irq_delay_max)
319 		task->delays->irq_delay_max = delta;
320 	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
321 		task->delays->irq_delay_min = delta;
322 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
323 }
324 
325