1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* delayacct.c - per-task delay accounting 3 * 4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/sched/task.h> 9 #include <linux/sched/cputime.h> 10 #include <linux/sched/clock.h> 11 #include <linux/slab.h> 12 #include <linux/taskstats.h> 13 #include <linux/sysctl.h> 14 #include <linux/delayacct.h> 15 #include <linux/module.h> 16 17 #define UPDATE_DELAY(type) \ 18 do { \ 19 d->type##_delay_max = tsk->delays->type##_delay_max; \ 20 d->type##_delay_min = tsk->delays->type##_delay_min; \ 21 d->type##_delay_max_ts = tsk->delays->type##_delay_max_ts; \ 22 tmp = d->type##_delay_total + tsk->delays->type##_delay; \ 23 d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \ 24 d->type##_count += tsk->delays->type##_count; \ 25 } while (0) 26 27 DEFINE_STATIC_KEY_FALSE(delayacct_key); 28 int delayacct_on __read_mostly; /* Delay accounting turned on/off */ 29 struct kmem_cache *delayacct_cache; 30 31 static void set_delayacct(bool enabled) 32 { 33 if (enabled) { 34 static_branch_enable(&delayacct_key); 35 delayacct_on = 1; 36 } else { 37 delayacct_on = 0; 38 static_branch_disable(&delayacct_key); 39 } 40 } 41 42 static int __init delayacct_setup_enable(char *str) 43 { 44 delayacct_on = 1; 45 return 1; 46 } 47 __setup("delayacct", delayacct_setup_enable); 48 49 void delayacct_init(void) 50 { 51 delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); 52 delayacct_tsk_init(&init_task); 53 set_delayacct(delayacct_on); 54 } 55 56 #ifdef CONFIG_PROC_SYSCTL 57 static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer, 58 size_t *lenp, loff_t *ppos) 59 { 60 int state = delayacct_on; 61 struct ctl_table t; 62 int err; 63 64 if (write && !capable(CAP_SYS_ADMIN)) 65 return -EPERM; 66 67 t = *table; 68 t.data = &state; 69 err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); 70 if (err < 0) 71 return err; 72 if (write) 73 set_delayacct(state); 74 return err; 75 } 76 77 static const struct ctl_table kern_delayacct_table[] = { 78 { 79 .procname = "task_delayacct", 80 .data = NULL, 81 .maxlen = sizeof(unsigned int), 82 .mode = 0644, 83 .proc_handler = sysctl_delayacct, 84 .extra1 = SYSCTL_ZERO, 85 .extra2 = SYSCTL_ONE, 86 }, 87 }; 88 89 static __init int kernel_delayacct_sysctls_init(void) 90 { 91 register_sysctl_init("kernel", kern_delayacct_table); 92 return 0; 93 } 94 late_initcall(kernel_delayacct_sysctls_init); 95 #endif 96 97 void __delayacct_tsk_init(struct task_struct *tsk) 98 { 99 tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); 100 if (tsk->delays) 101 raw_spin_lock_init(&tsk->delays->lock); 102 } 103 104 /* 105 * Finish delay accounting for a statistic using its timestamps (@start), 106 * accumulator (@total) and @count 107 */ 108 static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, 109 u64 *max, u64 *min, struct timespec64 *ts) 110 { 111 s64 ns = local_clock() - *start; 112 unsigned long flags; 113 114 if (ns > 0) { 115 raw_spin_lock_irqsave(lock, flags); 116 *total += ns; 117 (*count)++; 118 if (ns > *max) { 119 *max = ns; 120 ktime_get_real_ts64(ts); 121 } 122 if (*min == 0 || ns < *min) 123 *min = ns; 124 raw_spin_unlock_irqrestore(lock, flags); 125 } 126 } 127 128 void __delayacct_blkio_start(void) 129 { 130 current->delays->blkio_start = local_clock(); 131 } 132 133 /* 134 * We cannot rely on the `current` macro, as we haven't yet switched back to 135 * the process being woken. 136 */ 137 void __delayacct_blkio_end(struct task_struct *p) 138 { 139 delayacct_end(&p->delays->lock, 140 &p->delays->blkio_start, 141 &p->delays->blkio_delay, 142 &p->delays->blkio_count, 143 &p->delays->blkio_delay_max, 144 &p->delays->blkio_delay_min, 145 &p->delays->blkio_delay_max_ts); 146 } 147 148 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) 149 { 150 u64 utime, stime, stimescaled, utimescaled; 151 unsigned long long t2, t3; 152 unsigned long flags, t1; 153 s64 tmp; 154 155 task_cputime(tsk, &utime, &stime); 156 tmp = (s64)d->cpu_run_real_total; 157 tmp += utime + stime; 158 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; 159 160 task_cputime_scaled(tsk, &utimescaled, &stimescaled); 161 tmp = (s64)d->cpu_scaled_run_real_total; 162 tmp += utimescaled + stimescaled; 163 d->cpu_scaled_run_real_total = 164 (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; 165 166 /* 167 * No locking available for sched_info (and too expensive to add one) 168 * Mitigate by taking snapshot of values 169 */ 170 t1 = tsk->sched_info.pcount; 171 t2 = tsk->sched_info.run_delay; 172 t3 = tsk->se.sum_exec_runtime; 173 174 d->cpu_count += t1; 175 176 d->cpu_delay_max = tsk->sched_info.max_run_delay; 177 d->cpu_delay_min = tsk->sched_info.min_run_delay; 178 d->cpu_delay_max_ts = tsk->sched_info.max_run_delay_ts; 179 tmp = (s64)d->cpu_delay_total + t2; 180 d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; 181 tmp = (s64)d->cpu_run_virtual_total + t3; 182 183 d->cpu_run_virtual_total = 184 (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; 185 186 if (!tsk->delays) 187 return 0; 188 189 /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ 190 raw_spin_lock_irqsave(&tsk->delays->lock, flags); 191 UPDATE_DELAY(blkio); 192 UPDATE_DELAY(swapin); 193 UPDATE_DELAY(freepages); 194 UPDATE_DELAY(thrashing); 195 UPDATE_DELAY(compact); 196 UPDATE_DELAY(wpcopy); 197 UPDATE_DELAY(irq); 198 raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); 199 200 return 0; 201 } 202 203 __u64 __delayacct_blkio_ticks(struct task_struct *tsk) 204 { 205 __u64 ret; 206 unsigned long flags; 207 208 raw_spin_lock_irqsave(&tsk->delays->lock, flags); 209 ret = nsec_to_clock_t(tsk->delays->blkio_delay); 210 raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); 211 return ret; 212 } 213 214 void __delayacct_freepages_start(void) 215 { 216 current->delays->freepages_start = local_clock(); 217 } 218 219 void __delayacct_freepages_end(void) 220 { 221 delayacct_end(¤t->delays->lock, 222 ¤t->delays->freepages_start, 223 ¤t->delays->freepages_delay, 224 ¤t->delays->freepages_count, 225 ¤t->delays->freepages_delay_max, 226 ¤t->delays->freepages_delay_min, 227 ¤t->delays->freepages_delay_max_ts); 228 } 229 230 void __delayacct_thrashing_start(bool *in_thrashing) 231 { 232 *in_thrashing = !!current->in_thrashing; 233 if (*in_thrashing) 234 return; 235 236 current->in_thrashing = 1; 237 current->delays->thrashing_start = local_clock(); 238 } 239 240 void __delayacct_thrashing_end(bool *in_thrashing) 241 { 242 if (*in_thrashing) 243 return; 244 245 current->in_thrashing = 0; 246 delayacct_end(¤t->delays->lock, 247 ¤t->delays->thrashing_start, 248 ¤t->delays->thrashing_delay, 249 ¤t->delays->thrashing_count, 250 ¤t->delays->thrashing_delay_max, 251 ¤t->delays->thrashing_delay_min, 252 ¤t->delays->thrashing_delay_max_ts); 253 } 254 255 void __delayacct_swapin_start(void) 256 { 257 current->delays->swapin_start = local_clock(); 258 } 259 260 void __delayacct_swapin_end(void) 261 { 262 delayacct_end(¤t->delays->lock, 263 ¤t->delays->swapin_start, 264 ¤t->delays->swapin_delay, 265 ¤t->delays->swapin_count, 266 ¤t->delays->swapin_delay_max, 267 ¤t->delays->swapin_delay_min, 268 ¤t->delays->swapin_delay_max_ts); 269 } 270 271 void __delayacct_compact_start(void) 272 { 273 current->delays->compact_start = local_clock(); 274 } 275 276 void __delayacct_compact_end(void) 277 { 278 delayacct_end(¤t->delays->lock, 279 ¤t->delays->compact_start, 280 ¤t->delays->compact_delay, 281 ¤t->delays->compact_count, 282 ¤t->delays->compact_delay_max, 283 ¤t->delays->compact_delay_min, 284 ¤t->delays->compact_delay_max_ts); 285 } 286 287 void __delayacct_wpcopy_start(void) 288 { 289 current->delays->wpcopy_start = local_clock(); 290 } 291 292 void __delayacct_wpcopy_end(void) 293 { 294 delayacct_end(¤t->delays->lock, 295 ¤t->delays->wpcopy_start, 296 ¤t->delays->wpcopy_delay, 297 ¤t->delays->wpcopy_count, 298 ¤t->delays->wpcopy_delay_max, 299 ¤t->delays->wpcopy_delay_min, 300 ¤t->delays->wpcopy_delay_max_ts); 301 } 302 303 void __delayacct_irq(struct task_struct *task, u32 delta) 304 { 305 unsigned long flags; 306 307 raw_spin_lock_irqsave(&task->delays->lock, flags); 308 task->delays->irq_delay += delta; 309 task->delays->irq_count++; 310 if (delta > task->delays->irq_delay_max) { 311 task->delays->irq_delay_max = delta; 312 ktime_get_real_ts64(&task->delays->irq_delay_max_ts); 313 } 314 if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) 315 task->delays->irq_delay_min = delta; 316 raw_spin_unlock_irqrestore(&task->delays->lock, flags); 317 } 318 319