xref: /linux/kernel/delayacct.c (revision 7d4e49a77d9930c69751b9192448fda6ff9100f1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* delayacct.c - per-task delay accounting
3  *
4  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/sched/task.h>
9 #include <linux/sched/cputime.h>
10 #include <linux/sched/clock.h>
11 #include <linux/slab.h>
12 #include <linux/taskstats.h>
13 #include <linux/sysctl.h>
14 #include <linux/delayacct.h>
15 #include <linux/module.h>
16 
17 #define UPDATE_DELAY(type) \
18 do { \
19 	d->type##_delay_max = tsk->delays->type##_delay_max; \
20 	d->type##_delay_min = tsk->delays->type##_delay_min; \
21 	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
22 	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
23 	d->type##_count += tsk->delays->type##_count; \
24 } while (0)
25 
26 DEFINE_STATIC_KEY_FALSE(delayacct_key);
27 int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
28 struct kmem_cache *delayacct_cache;
29 
set_delayacct(bool enabled)30 static void set_delayacct(bool enabled)
31 {
32 	if (enabled) {
33 		static_branch_enable(&delayacct_key);
34 		delayacct_on = 1;
35 	} else {
36 		delayacct_on = 0;
37 		static_branch_disable(&delayacct_key);
38 	}
39 }
40 
delayacct_setup_enable(char * str)41 static int __init delayacct_setup_enable(char *str)
42 {
43 	delayacct_on = 1;
44 	return 1;
45 }
46 __setup("delayacct", delayacct_setup_enable);
47 
delayacct_init(void)48 void delayacct_init(void)
49 {
50 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
51 	delayacct_tsk_init(&init_task);
52 	set_delayacct(delayacct_on);
53 }
54 
55 #ifdef CONFIG_PROC_SYSCTL
sysctl_delayacct(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)56 static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer,
57 		     size_t *lenp, loff_t *ppos)
58 {
59 	int state = delayacct_on;
60 	struct ctl_table t;
61 	int err;
62 
63 	if (write && !capable(CAP_SYS_ADMIN))
64 		return -EPERM;
65 
66 	t = *table;
67 	t.data = &state;
68 	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
69 	if (err < 0)
70 		return err;
71 	if (write)
72 		set_delayacct(state);
73 	return err;
74 }
75 
76 static const struct ctl_table kern_delayacct_table[] = {
77 	{
78 		.procname       = "task_delayacct",
79 		.data           = NULL,
80 		.maxlen         = sizeof(unsigned int),
81 		.mode           = 0644,
82 		.proc_handler   = sysctl_delayacct,
83 		.extra1         = SYSCTL_ZERO,
84 		.extra2         = SYSCTL_ONE,
85 	},
86 };
87 
kernel_delayacct_sysctls_init(void)88 static __init int kernel_delayacct_sysctls_init(void)
89 {
90 	register_sysctl_init("kernel", kern_delayacct_table);
91 	return 0;
92 }
93 late_initcall(kernel_delayacct_sysctls_init);
94 #endif
95 
__delayacct_tsk_init(struct task_struct * tsk)96 void __delayacct_tsk_init(struct task_struct *tsk)
97 {
98 	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
99 	if (tsk->delays)
100 		raw_spin_lock_init(&tsk->delays->lock);
101 }
102 
103 /*
104  * Finish delay accounting for a statistic using its timestamps (@start),
105  * accumulator (@total) and @count
106  */
delayacct_end(raw_spinlock_t * lock,u64 * start,u64 * total,u32 * count,u64 * max,u64 * min)107 static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
108 {
109 	s64 ns = local_clock() - *start;
110 	unsigned long flags;
111 
112 	if (ns > 0) {
113 		raw_spin_lock_irqsave(lock, flags);
114 		*total += ns;
115 		(*count)++;
116 		if (ns > *max)
117 			*max = ns;
118 		if (*min == 0 || ns < *min)
119 			*min = ns;
120 		raw_spin_unlock_irqrestore(lock, flags);
121 	}
122 }
123 
__delayacct_blkio_start(void)124 void __delayacct_blkio_start(void)
125 {
126 	current->delays->blkio_start = local_clock();
127 }
128 
129 /*
130  * We cannot rely on the `current` macro, as we haven't yet switched back to
131  * the process being woken.
132  */
__delayacct_blkio_end(struct task_struct * p)133 void __delayacct_blkio_end(struct task_struct *p)
134 {
135 	delayacct_end(&p->delays->lock,
136 		      &p->delays->blkio_start,
137 		      &p->delays->blkio_delay,
138 		      &p->delays->blkio_count,
139 		      &p->delays->blkio_delay_max,
140 		      &p->delays->blkio_delay_min);
141 }
142 
delayacct_add_tsk(struct taskstats * d,struct task_struct * tsk)143 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
144 {
145 	u64 utime, stime, stimescaled, utimescaled;
146 	unsigned long long t2, t3;
147 	unsigned long flags, t1;
148 	s64 tmp;
149 
150 	task_cputime(tsk, &utime, &stime);
151 	tmp = (s64)d->cpu_run_real_total;
152 	tmp += utime + stime;
153 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
154 
155 	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
156 	tmp = (s64)d->cpu_scaled_run_real_total;
157 	tmp += utimescaled + stimescaled;
158 	d->cpu_scaled_run_real_total =
159 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
160 
161 	/*
162 	 * No locking available for sched_info (and too expensive to add one)
163 	 * Mitigate by taking snapshot of values
164 	 */
165 	t1 = tsk->sched_info.pcount;
166 	t2 = tsk->sched_info.run_delay;
167 	t3 = tsk->se.sum_exec_runtime;
168 
169 	d->cpu_count += t1;
170 
171 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
172 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
173 	tmp = (s64)d->cpu_delay_total + t2;
174 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
175 	tmp = (s64)d->cpu_run_virtual_total + t3;
176 
177 	d->cpu_run_virtual_total =
178 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
179 
180 	if (!tsk->delays)
181 		return 0;
182 
183 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
184 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
185 	UPDATE_DELAY(blkio);
186 	UPDATE_DELAY(swapin);
187 	UPDATE_DELAY(freepages);
188 	UPDATE_DELAY(thrashing);
189 	UPDATE_DELAY(compact);
190 	UPDATE_DELAY(wpcopy);
191 	UPDATE_DELAY(irq);
192 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
193 
194 	return 0;
195 }
196 
__delayacct_blkio_ticks(struct task_struct * tsk)197 __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
198 {
199 	__u64 ret;
200 	unsigned long flags;
201 
202 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
203 	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
204 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
205 	return ret;
206 }
207 
__delayacct_freepages_start(void)208 void __delayacct_freepages_start(void)
209 {
210 	current->delays->freepages_start = local_clock();
211 }
212 
__delayacct_freepages_end(void)213 void __delayacct_freepages_end(void)
214 {
215 	delayacct_end(&current->delays->lock,
216 		      &current->delays->freepages_start,
217 		      &current->delays->freepages_delay,
218 		      &current->delays->freepages_count,
219 		      &current->delays->freepages_delay_max,
220 		      &current->delays->freepages_delay_min);
221 }
222 
__delayacct_thrashing_start(bool * in_thrashing)223 void __delayacct_thrashing_start(bool *in_thrashing)
224 {
225 	*in_thrashing = !!current->in_thrashing;
226 	if (*in_thrashing)
227 		return;
228 
229 	current->in_thrashing = 1;
230 	current->delays->thrashing_start = local_clock();
231 }
232 
__delayacct_thrashing_end(bool * in_thrashing)233 void __delayacct_thrashing_end(bool *in_thrashing)
234 {
235 	if (*in_thrashing)
236 		return;
237 
238 	current->in_thrashing = 0;
239 	delayacct_end(&current->delays->lock,
240 		      &current->delays->thrashing_start,
241 		      &current->delays->thrashing_delay,
242 		      &current->delays->thrashing_count,
243 		      &current->delays->thrashing_delay_max,
244 		      &current->delays->thrashing_delay_min);
245 }
246 
__delayacct_swapin_start(void)247 void __delayacct_swapin_start(void)
248 {
249 	current->delays->swapin_start = local_clock();
250 }
251 
__delayacct_swapin_end(void)252 void __delayacct_swapin_end(void)
253 {
254 	delayacct_end(&current->delays->lock,
255 		      &current->delays->swapin_start,
256 		      &current->delays->swapin_delay,
257 		      &current->delays->swapin_count,
258 		      &current->delays->swapin_delay_max,
259 		      &current->delays->swapin_delay_min);
260 }
261 
__delayacct_compact_start(void)262 void __delayacct_compact_start(void)
263 {
264 	current->delays->compact_start = local_clock();
265 }
266 
__delayacct_compact_end(void)267 void __delayacct_compact_end(void)
268 {
269 	delayacct_end(&current->delays->lock,
270 		      &current->delays->compact_start,
271 		      &current->delays->compact_delay,
272 		      &current->delays->compact_count,
273 		      &current->delays->compact_delay_max,
274 		      &current->delays->compact_delay_min);
275 }
276 
__delayacct_wpcopy_start(void)277 void __delayacct_wpcopy_start(void)
278 {
279 	current->delays->wpcopy_start = local_clock();
280 }
281 
__delayacct_wpcopy_end(void)282 void __delayacct_wpcopy_end(void)
283 {
284 	delayacct_end(&current->delays->lock,
285 		      &current->delays->wpcopy_start,
286 		      &current->delays->wpcopy_delay,
287 		      &current->delays->wpcopy_count,
288 		      &current->delays->wpcopy_delay_max,
289 		      &current->delays->wpcopy_delay_min);
290 }
291 
__delayacct_irq(struct task_struct * task,u32 delta)292 void __delayacct_irq(struct task_struct *task, u32 delta)
293 {
294 	unsigned long flags;
295 
296 	raw_spin_lock_irqsave(&task->delays->lock, flags);
297 	task->delays->irq_delay += delta;
298 	task->delays->irq_count++;
299 	if (delta > task->delays->irq_delay_max)
300 		task->delays->irq_delay_max = delta;
301 	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
302 		task->delays->irq_delay_min = delta;
303 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
304 }
305 
306