xref: /linux/kernel/delayacct.c (revision 90079798f1d748e97c74e23736491543577b8aee)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* delayacct.c - per-task delay accounting
3  *
4  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/sched/task.h>
9 #include <linux/sched/cputime.h>
10 #include <linux/sched/clock.h>
11 #include <linux/slab.h>
12 #include <linux/taskstats.h>
13 #include <linux/sysctl.h>
14 #include <linux/delayacct.h>
15 #include <linux/module.h>
16 
17 #define UPDATE_DELAY(type) \
18 do { \
19 	d->type##_delay_max = tsk->delays->type##_delay_max; \
20 	d->type##_delay_min = tsk->delays->type##_delay_min; \
21 	d->type##_delay_max_ts.tv_sec = tsk->delays->type##_delay_max_ts.tv_sec; \
22 	d->type##_delay_max_ts.tv_nsec = tsk->delays->type##_delay_max_ts.tv_nsec; \
23 	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
24 	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
25 	d->type##_count += tsk->delays->type##_count; \
26 } while (0)
27 
28 DEFINE_STATIC_KEY_FALSE(delayacct_key);
29 int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
30 struct kmem_cache *delayacct_cache;
31 
32 static void set_delayacct(bool enabled)
33 {
34 	if (enabled) {
35 		static_branch_enable(&delayacct_key);
36 		delayacct_on = 1;
37 	} else {
38 		delayacct_on = 0;
39 		static_branch_disable(&delayacct_key);
40 	}
41 }
42 
43 static int __init delayacct_setup_enable(char *str)
44 {
45 	delayacct_on = 1;
46 	return 1;
47 }
48 __setup("delayacct", delayacct_setup_enable);
49 
50 void delayacct_init(void)
51 {
52 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
53 	delayacct_tsk_init(&init_task);
54 	set_delayacct(delayacct_on);
55 }
56 
57 #ifdef CONFIG_PROC_SYSCTL
58 static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer,
59 		     size_t *lenp, loff_t *ppos)
60 {
61 	int state = delayacct_on;
62 	struct ctl_table t;
63 	int err;
64 
65 	if (write && !capable(CAP_SYS_ADMIN))
66 		return -EPERM;
67 
68 	t = *table;
69 	t.data = &state;
70 	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
71 	if (err < 0)
72 		return err;
73 	if (write)
74 		set_delayacct(state);
75 	return err;
76 }
77 
78 static const struct ctl_table kern_delayacct_table[] = {
79 	{
80 		.procname       = "task_delayacct",
81 		.data           = NULL,
82 		.maxlen         = sizeof(unsigned int),
83 		.mode           = 0644,
84 		.proc_handler   = sysctl_delayacct,
85 		.extra1         = SYSCTL_ZERO,
86 		.extra2         = SYSCTL_ONE,
87 	},
88 };
89 
90 static __init int kernel_delayacct_sysctls_init(void)
91 {
92 	register_sysctl_init("kernel", kern_delayacct_table);
93 	return 0;
94 }
95 late_initcall(kernel_delayacct_sysctls_init);
96 #endif
97 
98 void __delayacct_tsk_init(struct task_struct *tsk)
99 {
100 	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
101 	if (tsk->delays)
102 		raw_spin_lock_init(&tsk->delays->lock);
103 }
104 
105 /*
106  * Finish delay accounting for a statistic using its timestamps (@start),
107  * accumulator (@total) and @count
108  */
109 static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count,
110 							 u64 *max, u64 *min, struct timespec64 *ts)
111 {
112 	s64 ns = local_clock() - *start;
113 	unsigned long flags;
114 
115 	if (ns > 0) {
116 		raw_spin_lock_irqsave(lock, flags);
117 		*total += ns;
118 		(*count)++;
119 		if (ns > *max) {
120 			*max = ns;
121 			ktime_get_real_ts64(ts);
122 		}
123 		if (*min == 0 || ns < *min)
124 			*min = ns;
125 		raw_spin_unlock_irqrestore(lock, flags);
126 	}
127 }
128 
129 void __delayacct_blkio_start(void)
130 {
131 	current->delays->blkio_start = local_clock();
132 }
133 
134 /*
135  * We cannot rely on the `current` macro, as we haven't yet switched back to
136  * the process being woken.
137  */
138 void __delayacct_blkio_end(struct task_struct *p)
139 {
140 	delayacct_end(&p->delays->lock,
141 		      &p->delays->blkio_start,
142 		      &p->delays->blkio_delay,
143 		      &p->delays->blkio_count,
144 		      &p->delays->blkio_delay_max,
145 		      &p->delays->blkio_delay_min,
146 		      &p->delays->blkio_delay_max_ts);
147 }
148 
149 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
150 {
151 	u64 utime, stime, stimescaled, utimescaled;
152 	unsigned long long t2, t3;
153 	unsigned long flags, t1;
154 	s64 tmp;
155 
156 	task_cputime(tsk, &utime, &stime);
157 	tmp = (s64)d->cpu_run_real_total;
158 	tmp += utime + stime;
159 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
160 
161 	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
162 	tmp = (s64)d->cpu_scaled_run_real_total;
163 	tmp += utimescaled + stimescaled;
164 	d->cpu_scaled_run_real_total =
165 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
166 
167 	/*
168 	 * No locking available for sched_info (and too expensive to add one)
169 	 * Mitigate by taking snapshot of values
170 	 */
171 	t1 = tsk->sched_info.pcount;
172 	t2 = tsk->sched_info.run_delay;
173 	t3 = tsk->se.sum_exec_runtime;
174 
175 	d->cpu_count += t1;
176 
177 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
178 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
179 	d->cpu_delay_max_ts.tv_sec = tsk->sched_info.max_run_delay_ts.tv_sec;
180 	d->cpu_delay_max_ts.tv_nsec = tsk->sched_info.max_run_delay_ts.tv_nsec;
181 	tmp = (s64)d->cpu_delay_total + t2;
182 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
183 	tmp = (s64)d->cpu_run_virtual_total + t3;
184 
185 	d->cpu_run_virtual_total =
186 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
187 
188 	if (!tsk->delays)
189 		return 0;
190 
191 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
192 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
193 	UPDATE_DELAY(blkio);
194 	UPDATE_DELAY(swapin);
195 	UPDATE_DELAY(freepages);
196 	UPDATE_DELAY(thrashing);
197 	UPDATE_DELAY(compact);
198 	UPDATE_DELAY(wpcopy);
199 	UPDATE_DELAY(irq);
200 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
201 
202 	return 0;
203 }
204 
205 __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
206 {
207 	__u64 ret;
208 	unsigned long flags;
209 
210 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
211 	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
212 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
213 	return ret;
214 }
215 
216 void __delayacct_freepages_start(void)
217 {
218 	current->delays->freepages_start = local_clock();
219 }
220 
221 void __delayacct_freepages_end(void)
222 {
223 	delayacct_end(&current->delays->lock,
224 		      &current->delays->freepages_start,
225 		      &current->delays->freepages_delay,
226 		      &current->delays->freepages_count,
227 		      &current->delays->freepages_delay_max,
228 		      &current->delays->freepages_delay_min,
229 		      &current->delays->freepages_delay_max_ts);
230 }
231 
232 void __delayacct_thrashing_start(bool *in_thrashing)
233 {
234 	*in_thrashing = !!current->in_thrashing;
235 	if (*in_thrashing)
236 		return;
237 
238 	current->in_thrashing = 1;
239 	current->delays->thrashing_start = local_clock();
240 }
241 
242 void __delayacct_thrashing_end(bool *in_thrashing)
243 {
244 	if (*in_thrashing)
245 		return;
246 
247 	current->in_thrashing = 0;
248 	delayacct_end(&current->delays->lock,
249 		      &current->delays->thrashing_start,
250 		      &current->delays->thrashing_delay,
251 		      &current->delays->thrashing_count,
252 		      &current->delays->thrashing_delay_max,
253 		      &current->delays->thrashing_delay_min,
254 		      &current->delays->thrashing_delay_max_ts);
255 }
256 
257 void __delayacct_swapin_start(void)
258 {
259 	current->delays->swapin_start = local_clock();
260 }
261 
262 void __delayacct_swapin_end(void)
263 {
264 	delayacct_end(&current->delays->lock,
265 		      &current->delays->swapin_start,
266 		      &current->delays->swapin_delay,
267 		      &current->delays->swapin_count,
268 		      &current->delays->swapin_delay_max,
269 		      &current->delays->swapin_delay_min,
270 		      &current->delays->swapin_delay_max_ts);
271 }
272 
273 void __delayacct_compact_start(void)
274 {
275 	current->delays->compact_start = local_clock();
276 }
277 
278 void __delayacct_compact_end(void)
279 {
280 	delayacct_end(&current->delays->lock,
281 		      &current->delays->compact_start,
282 		      &current->delays->compact_delay,
283 		      &current->delays->compact_count,
284 		      &current->delays->compact_delay_max,
285 		      &current->delays->compact_delay_min,
286 		      &current->delays->compact_delay_max_ts);
287 }
288 
289 void __delayacct_wpcopy_start(void)
290 {
291 	current->delays->wpcopy_start = local_clock();
292 }
293 
294 void __delayacct_wpcopy_end(void)
295 {
296 	delayacct_end(&current->delays->lock,
297 		      &current->delays->wpcopy_start,
298 		      &current->delays->wpcopy_delay,
299 		      &current->delays->wpcopy_count,
300 		      &current->delays->wpcopy_delay_max,
301 		      &current->delays->wpcopy_delay_min,
302 		      &current->delays->wpcopy_delay_max_ts);
303 }
304 
305 void __delayacct_irq(struct task_struct *task, u32 delta)
306 {
307 	unsigned long flags;
308 
309 	raw_spin_lock_irqsave(&task->delays->lock, flags);
310 	task->delays->irq_delay += delta;
311 	task->delays->irq_count++;
312 	if (delta > task->delays->irq_delay_max) {
313 		task->delays->irq_delay_max = delta;
314 		ktime_get_real_ts64(&task->delays->irq_delay_max_ts);
315 	}
316 	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
317 		task->delays->irq_delay_min = delta;
318 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
319 }
320 
321