1 #include <linux/cgroup.h> 2 #include <linux/slab.h> 3 #include <linux/percpu.h> 4 #include <linux/spinlock.h> 5 #include <linux/cpumask.h> 6 #include <linux/seq_file.h> 7 #include <linux/rcupdate.h> 8 #include <linux/kernel_stat.h> 9 #include <linux/err.h> 10 11 #include "sched.h" 12 13 /* 14 * CPU accounting code for task groups. 15 * 16 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh 17 * (balbir@in.ibm.com). 18 */ 19 20 /* Time spent by the tasks of the cpu accounting group executing in ... */ 21 enum cpuacct_stat_index { 22 CPUACCT_STAT_USER, /* ... user mode */ 23 CPUACCT_STAT_SYSTEM, /* ... kernel mode */ 24 25 CPUACCT_STAT_NSTATS, 26 }; 27 28 static const char * const cpuacct_stat_desc[] = { 29 [CPUACCT_STAT_USER] = "user", 30 [CPUACCT_STAT_SYSTEM] = "system", 31 }; 32 33 struct cpuacct_usage { 34 u64 usages[CPUACCT_STAT_NSTATS]; 35 }; 36 37 /* track cpu usage of a group of tasks and its child groups */ 38 struct cpuacct { 39 struct cgroup_subsys_state css; 40 /* cpuusage holds pointer to a u64-type object on every cpu */ 41 struct cpuacct_usage __percpu *cpuusage; 42 struct kernel_cpustat __percpu *cpustat; 43 }; 44 45 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) 46 { 47 return css ? container_of(css, struct cpuacct, css) : NULL; 48 } 49 50 /* return cpu accounting group to which this task belongs */ 51 static inline struct cpuacct *task_ca(struct task_struct *tsk) 52 { 53 return css_ca(task_css(tsk, cpuacct_cgrp_id)); 54 } 55 56 static inline struct cpuacct *parent_ca(struct cpuacct *ca) 57 { 58 return css_ca(ca->css.parent); 59 } 60 61 static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); 62 static struct cpuacct root_cpuacct = { 63 .cpustat = &kernel_cpustat, 64 .cpuusage = &root_cpuacct_cpuusage, 65 }; 66 67 /* create a new cpu accounting group */ 68 static struct cgroup_subsys_state * 69 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) 70 { 71 struct cpuacct *ca; 72 73 if (!parent_css) 74 return &root_cpuacct.css; 75 76 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 77 if (!ca) 78 goto out; 79 80 ca->cpuusage = alloc_percpu(struct cpuacct_usage); 81 if (!ca->cpuusage) 82 goto out_free_ca; 83 84 ca->cpustat = alloc_percpu(struct kernel_cpustat); 85 if (!ca->cpustat) 86 goto out_free_cpuusage; 87 88 return &ca->css; 89 90 out_free_cpuusage: 91 free_percpu(ca->cpuusage); 92 out_free_ca: 93 kfree(ca); 94 out: 95 return ERR_PTR(-ENOMEM); 96 } 97 98 /* destroy an existing cpu accounting group */ 99 static void cpuacct_css_free(struct cgroup_subsys_state *css) 100 { 101 struct cpuacct *ca = css_ca(css); 102 103 free_percpu(ca->cpustat); 104 free_percpu(ca->cpuusage); 105 kfree(ca); 106 } 107 108 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, 109 enum cpuacct_stat_index index) 110 { 111 struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 112 u64 data; 113 114 /* 115 * We allow index == CPUACCT_STAT_NSTATS here to read 116 * the sum of suages. 117 */ 118 BUG_ON(index > CPUACCT_STAT_NSTATS); 119 120 #ifndef CONFIG_64BIT 121 /* 122 * Take rq->lock to make 64-bit read safe on 32-bit platforms. 123 */ 124 raw_spin_lock_irq(&cpu_rq(cpu)->lock); 125 #endif 126 127 if (index == CPUACCT_STAT_NSTATS) { 128 int i = 0; 129 130 data = 0; 131 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) 132 data += cpuusage->usages[i]; 133 } else { 134 data = cpuusage->usages[index]; 135 } 136 137 #ifndef CONFIG_64BIT 138 raw_spin_unlock_irq(&cpu_rq(cpu)->lock); 139 #endif 140 141 return data; 142 } 143 144 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) 145 { 146 struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 147 int i; 148 149 #ifndef CONFIG_64BIT 150 /* 151 * Take rq->lock to make 64-bit write safe on 32-bit platforms. 152 */ 153 raw_spin_lock_irq(&cpu_rq(cpu)->lock); 154 #endif 155 156 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) 157 cpuusage->usages[i] = val; 158 159 #ifndef CONFIG_64BIT 160 raw_spin_unlock_irq(&cpu_rq(cpu)->lock); 161 #endif 162 } 163 164 /* return total cpu usage (in nanoseconds) of a group */ 165 static u64 __cpuusage_read(struct cgroup_subsys_state *css, 166 enum cpuacct_stat_index index) 167 { 168 struct cpuacct *ca = css_ca(css); 169 u64 totalcpuusage = 0; 170 int i; 171 172 for_each_possible_cpu(i) 173 totalcpuusage += cpuacct_cpuusage_read(ca, i, index); 174 175 return totalcpuusage; 176 } 177 178 static u64 cpuusage_user_read(struct cgroup_subsys_state *css, 179 struct cftype *cft) 180 { 181 return __cpuusage_read(css, CPUACCT_STAT_USER); 182 } 183 184 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, 185 struct cftype *cft) 186 { 187 return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); 188 } 189 190 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) 191 { 192 return __cpuusage_read(css, CPUACCT_STAT_NSTATS); 193 } 194 195 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, 196 u64 val) 197 { 198 struct cpuacct *ca = css_ca(css); 199 int cpu; 200 201 /* 202 * Only allow '0' here to do a reset. 203 */ 204 if (val) 205 return -EINVAL; 206 207 for_each_possible_cpu(cpu) 208 cpuacct_cpuusage_write(ca, cpu, 0); 209 210 return 0; 211 } 212 213 static int __cpuacct_percpu_seq_show(struct seq_file *m, 214 enum cpuacct_stat_index index) 215 { 216 struct cpuacct *ca = css_ca(seq_css(m)); 217 u64 percpu; 218 int i; 219 220 for_each_possible_cpu(i) { 221 percpu = cpuacct_cpuusage_read(ca, i, index); 222 seq_printf(m, "%llu ", (unsigned long long) percpu); 223 } 224 seq_printf(m, "\n"); 225 return 0; 226 } 227 228 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) 229 { 230 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); 231 } 232 233 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) 234 { 235 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); 236 } 237 238 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) 239 { 240 return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); 241 } 242 243 static int cpuacct_all_seq_show(struct seq_file *m, void *V) 244 { 245 struct cpuacct *ca = css_ca(seq_css(m)); 246 int index; 247 int cpu; 248 249 seq_puts(m, "cpu"); 250 for (index = 0; index < CPUACCT_STAT_NSTATS; index++) 251 seq_printf(m, " %s", cpuacct_stat_desc[index]); 252 seq_puts(m, "\n"); 253 254 for_each_possible_cpu(cpu) { 255 struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 256 257 seq_printf(m, "%d", cpu); 258 259 for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { 260 #ifndef CONFIG_64BIT 261 /* 262 * Take rq->lock to make 64-bit read safe on 32-bit 263 * platforms. 264 */ 265 raw_spin_lock_irq(&cpu_rq(cpu)->lock); 266 #endif 267 268 seq_printf(m, " %llu", cpuusage->usages[index]); 269 270 #ifndef CONFIG_64BIT 271 raw_spin_unlock_irq(&cpu_rq(cpu)->lock); 272 #endif 273 } 274 seq_puts(m, "\n"); 275 } 276 return 0; 277 } 278 279 static int cpuacct_stats_show(struct seq_file *sf, void *v) 280 { 281 struct cpuacct *ca = css_ca(seq_css(sf)); 282 s64 val[CPUACCT_STAT_NSTATS]; 283 int cpu; 284 int stat; 285 286 memset(val, 0, sizeof(val)); 287 for_each_possible_cpu(cpu) { 288 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; 289 290 val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; 291 val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; 292 val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; 293 val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; 294 val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; 295 } 296 297 for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { 298 seq_printf(sf, "%s %lld\n", 299 cpuacct_stat_desc[stat], 300 (long long)nsec_to_clock_t(val[stat])); 301 } 302 303 return 0; 304 } 305 306 static struct cftype files[] = { 307 { 308 .name = "usage", 309 .read_u64 = cpuusage_read, 310 .write_u64 = cpuusage_write, 311 }, 312 { 313 .name = "usage_user", 314 .read_u64 = cpuusage_user_read, 315 }, 316 { 317 .name = "usage_sys", 318 .read_u64 = cpuusage_sys_read, 319 }, 320 { 321 .name = "usage_percpu", 322 .seq_show = cpuacct_percpu_seq_show, 323 }, 324 { 325 .name = "usage_percpu_user", 326 .seq_show = cpuacct_percpu_user_seq_show, 327 }, 328 { 329 .name = "usage_percpu_sys", 330 .seq_show = cpuacct_percpu_sys_seq_show, 331 }, 332 { 333 .name = "usage_all", 334 .seq_show = cpuacct_all_seq_show, 335 }, 336 { 337 .name = "stat", 338 .seq_show = cpuacct_stats_show, 339 }, 340 { } /* terminate */ 341 }; 342 343 /* 344 * charge this task's execution time to its accounting group. 345 * 346 * called with rq->lock held. 347 */ 348 void cpuacct_charge(struct task_struct *tsk, u64 cputime) 349 { 350 struct cpuacct *ca; 351 int index = CPUACCT_STAT_SYSTEM; 352 struct pt_regs *regs = task_pt_regs(tsk); 353 354 if (regs && user_mode(regs)) 355 index = CPUACCT_STAT_USER; 356 357 rcu_read_lock(); 358 359 for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) 360 this_cpu_ptr(ca->cpuusage)->usages[index] += cputime; 361 362 rcu_read_unlock(); 363 } 364 365 /* 366 * Add user/system time to cpuacct. 367 * 368 * Note: it's the caller that updates the account of the root cgroup. 369 */ 370 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) 371 { 372 struct cpuacct *ca; 373 374 rcu_read_lock(); 375 for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) 376 this_cpu_ptr(ca->cpustat)->cpustat[index] += val; 377 rcu_read_unlock(); 378 } 379 380 struct cgroup_subsys cpuacct_cgrp_subsys = { 381 .css_alloc = cpuacct_css_alloc, 382 .css_free = cpuacct_css_free, 383 .legacy_cftypes = files, 384 .early_init = true, 385 }; 386