1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * CPU accounting code for task groups. 4 * 5 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh 6 * (balbir@in.ibm.com). 7 */ 8 #include <asm/irq_regs.h> 9 #include "sched.h" 10 11 /* Time spent by the tasks of the CPU accounting group executing in ... */ 12 enum cpuacct_stat_index { 13 CPUACCT_STAT_USER, /* ... user mode */ 14 CPUACCT_STAT_SYSTEM, /* ... kernel mode */ 15 16 CPUACCT_STAT_NSTATS, 17 }; 18 19 static const char * const cpuacct_stat_desc[] = { 20 [CPUACCT_STAT_USER] = "user", 21 [CPUACCT_STAT_SYSTEM] = "system", 22 }; 23 24 /* track CPU usage of a group of tasks and its child groups */ 25 struct cpuacct { 26 struct cgroup_subsys_state css; 27 /* cpuusage holds pointer to a u64-type object on every CPU */ 28 u64 __percpu *cpuusage; 29 struct kernel_cpustat __percpu *cpustat; 30 }; 31 32 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) 33 { 34 return css ? container_of(css, struct cpuacct, css) : NULL; 35 } 36 37 /* Return CPU accounting group to which this task belongs */ 38 static inline struct cpuacct *task_ca(struct task_struct *tsk) 39 { 40 return css_ca(task_css(tsk, cpuacct_cgrp_id)); 41 } 42 43 static inline struct cpuacct *parent_ca(struct cpuacct *ca) 44 { 45 return css_ca(ca->css.parent); 46 } 47 48 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); 49 static struct cpuacct root_cpuacct = { 50 .cpustat = &kernel_cpustat, 51 .cpuusage = &root_cpuacct_cpuusage, 52 }; 53 54 /* Create a new CPU accounting group */ 55 static struct cgroup_subsys_state * 56 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) 57 { 58 struct cpuacct *ca; 59 60 if (!parent_css) 61 return &root_cpuacct.css; 62 63 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 64 if (!ca) 65 goto out; 66 67 ca->cpuusage = alloc_percpu(u64); 68 if (!ca->cpuusage) 69 goto out_free_ca; 70 71 ca->cpustat = alloc_percpu(struct kernel_cpustat); 72 if (!ca->cpustat) 73 goto out_free_cpuusage; 74 75 return &ca->css; 76 77 out_free_cpuusage: 78 free_percpu(ca->cpuusage); 79 out_free_ca: 80 kfree(ca); 81 out: 82 return ERR_PTR(-ENOMEM); 83 } 84 85 /* Destroy an existing CPU accounting group */ 86 static void cpuacct_css_free(struct cgroup_subsys_state *css) 87 { 88 struct cpuacct *ca = css_ca(css); 89 90 free_percpu(ca->cpustat); 91 free_percpu(ca->cpuusage); 92 kfree(ca); 93 } 94 95 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, 96 enum cpuacct_stat_index index) 97 { 98 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 99 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; 100 u64 data; 101 102 /* 103 * We allow index == CPUACCT_STAT_NSTATS here to read 104 * the sum of usages. 105 */ 106 if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) 107 return 0; 108 109 #ifndef CONFIG_64BIT 110 /* 111 * Take rq->lock to make 64-bit read safe on 32-bit platforms. 112 */ 113 raw_spin_rq_lock_irq(cpu_rq(cpu)); 114 #endif 115 116 switch (index) { 117 case CPUACCT_STAT_USER: 118 data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; 119 break; 120 case CPUACCT_STAT_SYSTEM: 121 data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + 122 cpustat[CPUTIME_SOFTIRQ]; 123 break; 124 case CPUACCT_STAT_NSTATS: 125 data = *cpuusage; 126 break; 127 } 128 129 #ifndef CONFIG_64BIT 130 raw_spin_rq_unlock_irq(cpu_rq(cpu)); 131 #endif 132 133 return data; 134 } 135 136 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) 137 { 138 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 139 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; 140 141 /* Don't allow to reset global kernel_cpustat */ 142 if (ca == &root_cpuacct) 143 return; 144 145 #ifndef CONFIG_64BIT 146 /* 147 * Take rq->lock to make 64-bit write safe on 32-bit platforms. 148 */ 149 raw_spin_rq_lock_irq(cpu_rq(cpu)); 150 #endif 151 *cpuusage = 0; 152 cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; 153 cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; 154 cpustat[CPUTIME_SOFTIRQ] = 0; 155 156 #ifndef CONFIG_64BIT 157 raw_spin_rq_unlock_irq(cpu_rq(cpu)); 158 #endif 159 } 160 161 /* Return total CPU usage (in nanoseconds) of a group */ 162 static u64 __cpuusage_read(struct cgroup_subsys_state *css, 163 enum cpuacct_stat_index index) 164 { 165 struct cpuacct *ca = css_ca(css); 166 u64 totalcpuusage = 0; 167 int i; 168 169 for_each_possible_cpu(i) 170 totalcpuusage += cpuacct_cpuusage_read(ca, i, index); 171 172 return totalcpuusage; 173 } 174 175 static u64 cpuusage_user_read(struct cgroup_subsys_state *css, 176 struct cftype *cft) 177 { 178 return __cpuusage_read(css, CPUACCT_STAT_USER); 179 } 180 181 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, 182 struct cftype *cft) 183 { 184 return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); 185 } 186 187 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) 188 { 189 return __cpuusage_read(css, CPUACCT_STAT_NSTATS); 190 } 191 192 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, 193 u64 val) 194 { 195 struct cpuacct *ca = css_ca(css); 196 int cpu; 197 198 /* 199 * Only allow '0' here to do a reset. 200 */ 201 if (val) 202 return -EINVAL; 203 204 for_each_possible_cpu(cpu) 205 cpuacct_cpuusage_write(ca, cpu); 206 207 return 0; 208 } 209 210 static int __cpuacct_percpu_seq_show(struct seq_file *m, 211 enum cpuacct_stat_index index) 212 { 213 struct cpuacct *ca = css_ca(seq_css(m)); 214 u64 percpu; 215 int i; 216 217 for_each_possible_cpu(i) { 218 percpu = cpuacct_cpuusage_read(ca, i, index); 219 seq_printf(m, "%llu ", (unsigned long long) percpu); 220 } 221 seq_printf(m, "\n"); 222 return 0; 223 } 224 225 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) 226 { 227 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); 228 } 229 230 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) 231 { 232 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); 233 } 234 235 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) 236 { 237 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); 238 } 239 240 static int cpuacct_all_seq_show(struct seq_file *m, void *V) 241 { 242 struct cpuacct *ca = css_ca(seq_css(m)); 243 int index; 244 int cpu; 245 246 seq_puts(m, "cpu"); 247 for (index = 0; index < CPUACCT_STAT_NSTATS; index++) 248 seq_printf(m, " %s", cpuacct_stat_desc[index]); 249 seq_puts(m, "\n"); 250 251 for_each_possible_cpu(cpu) { 252 seq_printf(m, "%d", cpu); 253 for (index = 0; index < CPUACCT_STAT_NSTATS; index++) 254 seq_printf(m, " %llu", 255 cpuacct_cpuusage_read(ca, cpu, index)); 256 seq_puts(m, "\n"); 257 } 258 return 0; 259 } 260 261 static int cpuacct_stats_show(struct seq_file *sf, void *v) 262 { 263 struct cpuacct *ca = css_ca(seq_css(sf)); 264 struct task_cputime cputime; 265 u64 val[CPUACCT_STAT_NSTATS]; 266 int cpu; 267 int stat; 268 269 memset(&cputime, 0, sizeof(cputime)); 270 for_each_possible_cpu(cpu) { 271 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; 272 273 cputime.utime += cpustat[CPUTIME_USER]; 274 cputime.utime += cpustat[CPUTIME_NICE]; 275 cputime.stime += cpustat[CPUTIME_SYSTEM]; 276 cputime.stime += cpustat[CPUTIME_IRQ]; 277 cputime.stime += cpustat[CPUTIME_SOFTIRQ]; 278 279 cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); 280 } 281 282 cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime, 283 &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]); 284 285 for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { 286 seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat], 287 nsec_to_clock_t(val[stat])); 288 } 289 290 return 0; 291 } 292 293 static struct cftype files[] = { 294 { 295 .name = "usage", 296 .read_u64 = cpuusage_read, 297 .write_u64 = cpuusage_write, 298 }, 299 { 300 .name = "usage_user", 301 .read_u64 = cpuusage_user_read, 302 }, 303 { 304 .name = "usage_sys", 305 .read_u64 = cpuusage_sys_read, 306 }, 307 { 308 .name = "usage_percpu", 309 .seq_show = cpuacct_percpu_seq_show, 310 }, 311 { 312 .name = "usage_percpu_user", 313 .seq_show = cpuacct_percpu_user_seq_show, 314 }, 315 { 316 .name = "usage_percpu_sys", 317 .seq_show = cpuacct_percpu_sys_seq_show, 318 }, 319 { 320 .name = "usage_all", 321 .seq_show = cpuacct_all_seq_show, 322 }, 323 { 324 .name = "stat", 325 .seq_show = cpuacct_stats_show, 326 }, 327 { } /* terminate */ 328 }; 329 330 /* 331 * charge this task's execution time to its accounting group. 332 * 333 * called with rq->lock held. 334 */ 335 void cpuacct_charge(struct task_struct *tsk, u64 cputime) 336 { 337 struct cpuacct *ca; 338 339 rcu_read_lock(); 340 341 for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) 342 __this_cpu_add(*ca->cpuusage, cputime); 343 344 rcu_read_unlock(); 345 } 346 347 /* 348 * Add user/system time to cpuacct. 349 * 350 * Note: it's the caller that updates the account of the root cgroup. 351 */ 352 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) 353 { 354 struct cpuacct *ca; 355 356 rcu_read_lock(); 357 for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) 358 __this_cpu_add(ca->cpustat->cpustat[index], val); 359 rcu_read_unlock(); 360 } 361 362 struct cgroup_subsys cpuacct_cgrp_subsys = { 363 .css_alloc = cpuacct_css_alloc, 364 .css_free = cpuacct_css_free, 365 .legacy_cftypes = files, 366 .early_init = true, 367 }; 368