xref: /linux/kernel/delayacct.c (revision b50634c5e84a7a57c20b03e367a43f1b63b7ea23)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* delayacct.c - per-task delay accounting
3  *
4  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/sched/task.h>
9 #include <linux/sched/cputime.h>
10 #include <linux/sched/clock.h>
11 #include <linux/slab.h>
12 #include <linux/taskstats.h>
13 #include <linux/sysctl.h>
14 #include <linux/delayacct.h>
15 #include <linux/module.h>
16 
17 #define UPDATE_DELAY(type) \
18 do { \
19 	d->type##_delay_max = tsk->delays->type##_delay_max; \
20 	d->type##_delay_min = tsk->delays->type##_delay_min; \
21 	d->type##_delay_max_ts = tsk->delays->type##_delay_max_ts; \
22 	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
23 	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
24 	d->type##_count += tsk->delays->type##_count; \
25 } while (0)
26 
27 DEFINE_STATIC_KEY_FALSE(delayacct_key);
28 int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
29 struct kmem_cache *delayacct_cache;
30 
31 static void set_delayacct(bool enabled)
32 {
33 	if (enabled) {
34 		static_branch_enable(&delayacct_key);
35 		delayacct_on = 1;
36 	} else {
37 		delayacct_on = 0;
38 		static_branch_disable(&delayacct_key);
39 	}
40 }
41 
42 static int __init delayacct_setup_enable(char *str)
43 {
44 	delayacct_on = 1;
45 	return 1;
46 }
47 __setup("delayacct", delayacct_setup_enable);
48 
49 void delayacct_init(void)
50 {
51 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
52 	delayacct_tsk_init(&init_task);
53 	set_delayacct(delayacct_on);
54 }
55 
56 #ifdef CONFIG_PROC_SYSCTL
57 static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer,
58 		     size_t *lenp, loff_t *ppos)
59 {
60 	int state = delayacct_on;
61 	struct ctl_table t;
62 	int err;
63 
64 	if (write && !capable(CAP_SYS_ADMIN))
65 		return -EPERM;
66 
67 	t = *table;
68 	t.data = &state;
69 	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
70 	if (err < 0)
71 		return err;
72 	if (write)
73 		set_delayacct(state);
74 	return err;
75 }
76 
77 static const struct ctl_table kern_delayacct_table[] = {
78 	{
79 		.procname       = "task_delayacct",
80 		.data           = NULL,
81 		.maxlen         = sizeof(unsigned int),
82 		.mode           = 0644,
83 		.proc_handler   = sysctl_delayacct,
84 		.extra1         = SYSCTL_ZERO,
85 		.extra2         = SYSCTL_ONE,
86 	},
87 };
88 
89 static __init int kernel_delayacct_sysctls_init(void)
90 {
91 	register_sysctl_init("kernel", kern_delayacct_table);
92 	return 0;
93 }
94 late_initcall(kernel_delayacct_sysctls_init);
95 #endif
96 
97 void __delayacct_tsk_init(struct task_struct *tsk)
98 {
99 	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
100 	if (tsk->delays)
101 		raw_spin_lock_init(&tsk->delays->lock);
102 }
103 
104 /*
105  * Finish delay accounting for a statistic using its timestamps (@start),
106  * accumulator (@total) and @count
107  */
108 static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count,
109 							 u64 *max, u64 *min, struct timespec64 *ts)
110 {
111 	s64 ns = local_clock() - *start;
112 	unsigned long flags;
113 
114 	if (ns > 0) {
115 		raw_spin_lock_irqsave(lock, flags);
116 		*total += ns;
117 		(*count)++;
118 		if (ns > *max) {
119 			*max = ns;
120 			ktime_get_real_ts64(ts);
121 		}
122 		if (*min == 0 || ns < *min)
123 			*min = ns;
124 		raw_spin_unlock_irqrestore(lock, flags);
125 	}
126 }
127 
128 void __delayacct_blkio_start(void)
129 {
130 	current->delays->blkio_start = local_clock();
131 }
132 
133 /*
134  * We cannot rely on the `current` macro, as we haven't yet switched back to
135  * the process being woken.
136  */
137 void __delayacct_blkio_end(struct task_struct *p)
138 {
139 	delayacct_end(&p->delays->lock,
140 		      &p->delays->blkio_start,
141 		      &p->delays->blkio_delay,
142 		      &p->delays->blkio_count,
143 		      &p->delays->blkio_delay_max,
144 		      &p->delays->blkio_delay_min,
145 		      &p->delays->blkio_delay_max_ts);
146 }
147 
148 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
149 {
150 	u64 utime, stime, stimescaled, utimescaled;
151 	unsigned long long t2, t3;
152 	unsigned long flags, t1;
153 	s64 tmp;
154 
155 	task_cputime(tsk, &utime, &stime);
156 	tmp = (s64)d->cpu_run_real_total;
157 	tmp += utime + stime;
158 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
159 
160 	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
161 	tmp = (s64)d->cpu_scaled_run_real_total;
162 	tmp += utimescaled + stimescaled;
163 	d->cpu_scaled_run_real_total =
164 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
165 
166 	/*
167 	 * No locking available for sched_info (and too expensive to add one)
168 	 * Mitigate by taking snapshot of values
169 	 */
170 	t1 = tsk->sched_info.pcount;
171 	t2 = tsk->sched_info.run_delay;
172 	t3 = tsk->se.sum_exec_runtime;
173 
174 	d->cpu_count += t1;
175 
176 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
177 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
178 	d->cpu_delay_max_ts = tsk->sched_info.max_run_delay_ts;
179 	tmp = (s64)d->cpu_delay_total + t2;
180 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
181 	tmp = (s64)d->cpu_run_virtual_total + t3;
182 
183 	d->cpu_run_virtual_total =
184 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
185 
186 	if (!tsk->delays)
187 		return 0;
188 
189 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
190 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
191 	UPDATE_DELAY(blkio);
192 	UPDATE_DELAY(swapin);
193 	UPDATE_DELAY(freepages);
194 	UPDATE_DELAY(thrashing);
195 	UPDATE_DELAY(compact);
196 	UPDATE_DELAY(wpcopy);
197 	UPDATE_DELAY(irq);
198 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
199 
200 	return 0;
201 }
202 
203 __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
204 {
205 	__u64 ret;
206 	unsigned long flags;
207 
208 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
209 	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
210 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
211 	return ret;
212 }
213 
214 void __delayacct_freepages_start(void)
215 {
216 	current->delays->freepages_start = local_clock();
217 }
218 
219 void __delayacct_freepages_end(void)
220 {
221 	delayacct_end(&current->delays->lock,
222 		      &current->delays->freepages_start,
223 		      &current->delays->freepages_delay,
224 		      &current->delays->freepages_count,
225 		      &current->delays->freepages_delay_max,
226 		      &current->delays->freepages_delay_min,
227 		      &current->delays->freepages_delay_max_ts);
228 }
229 
230 void __delayacct_thrashing_start(bool *in_thrashing)
231 {
232 	*in_thrashing = !!current->in_thrashing;
233 	if (*in_thrashing)
234 		return;
235 
236 	current->in_thrashing = 1;
237 	current->delays->thrashing_start = local_clock();
238 }
239 
240 void __delayacct_thrashing_end(bool *in_thrashing)
241 {
242 	if (*in_thrashing)
243 		return;
244 
245 	current->in_thrashing = 0;
246 	delayacct_end(&current->delays->lock,
247 		      &current->delays->thrashing_start,
248 		      &current->delays->thrashing_delay,
249 		      &current->delays->thrashing_count,
250 		      &current->delays->thrashing_delay_max,
251 		      &current->delays->thrashing_delay_min,
252 		      &current->delays->thrashing_delay_max_ts);
253 }
254 
255 void __delayacct_swapin_start(void)
256 {
257 	current->delays->swapin_start = local_clock();
258 }
259 
260 void __delayacct_swapin_end(void)
261 {
262 	delayacct_end(&current->delays->lock,
263 		      &current->delays->swapin_start,
264 		      &current->delays->swapin_delay,
265 		      &current->delays->swapin_count,
266 		      &current->delays->swapin_delay_max,
267 		      &current->delays->swapin_delay_min,
268 		      &current->delays->swapin_delay_max_ts);
269 }
270 
271 void __delayacct_compact_start(void)
272 {
273 	current->delays->compact_start = local_clock();
274 }
275 
276 void __delayacct_compact_end(void)
277 {
278 	delayacct_end(&current->delays->lock,
279 		      &current->delays->compact_start,
280 		      &current->delays->compact_delay,
281 		      &current->delays->compact_count,
282 		      &current->delays->compact_delay_max,
283 		      &current->delays->compact_delay_min,
284 		      &current->delays->compact_delay_max_ts);
285 }
286 
287 void __delayacct_wpcopy_start(void)
288 {
289 	current->delays->wpcopy_start = local_clock();
290 }
291 
292 void __delayacct_wpcopy_end(void)
293 {
294 	delayacct_end(&current->delays->lock,
295 		      &current->delays->wpcopy_start,
296 		      &current->delays->wpcopy_delay,
297 		      &current->delays->wpcopy_count,
298 		      &current->delays->wpcopy_delay_max,
299 		      &current->delays->wpcopy_delay_min,
300 		      &current->delays->wpcopy_delay_max_ts);
301 }
302 
303 void __delayacct_irq(struct task_struct *task, u32 delta)
304 {
305 	unsigned long flags;
306 
307 	raw_spin_lock_irqsave(&task->delays->lock, flags);
308 	task->delays->irq_delay += delta;
309 	task->delays->irq_count++;
310 	if (delta > task->delays->irq_delay_max) {
311 		task->delays->irq_delay_max = delta;
312 		ktime_get_real_ts64(&task->delays->irq_delay_max_ts);
313 	}
314 	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
315 		task->delays->irq_delay_min = delta;
316 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
317 }
318 
319