1 /* 2 * taskstats.c - Export per-task statistics to userland 3 * 4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5 * (C) Balbir Singh, IBM Corp. 2006 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/taskstats_kern.h> 21 #include <linux/tsacct_kern.h> 22 #include <linux/delayacct.h> 23 #include <linux/tsacct_kern.h> 24 #include <linux/cpumask.h> 25 #include <linux/percpu.h> 26 #include <net/genetlink.h> 27 #include <asm/atomic.h> 28 29 /* 30 * Maximum length of a cpumask that can be specified in 31 * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute 32 */ 33 #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 34 35 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 36 static int family_registered; 37 struct kmem_cache *taskstats_cache; 38 39 static struct genl_family family = { 40 .id = GENL_ID_GENERATE, 41 .name = TASKSTATS_GENL_NAME, 42 .version = TASKSTATS_GENL_VERSION, 43 .maxattr = TASKSTATS_CMD_ATTR_MAX, 44 }; 45 46 static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] 47 __read_mostly = { 48 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 49 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 50 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 51 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 52 53 struct listener { 54 struct list_head list; 55 pid_t pid; 56 char valid; 57 }; 58 59 struct listener_list { 60 struct rw_semaphore sem; 61 struct list_head list; 62 }; 63 static DEFINE_PER_CPU(struct listener_list, listener_array); 64 65 enum actions { 66 REGISTER, 67 DEREGISTER, 68 CPU_DONT_CARE 69 }; 70 71 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 72 size_t size) 73 { 74 struct sk_buff *skb; 75 void *reply; 76 77 /* 78 * If new attributes are added, please revisit this allocation 79 */ 80 skb = genlmsg_new(size, GFP_KERNEL); 81 if (!skb) 82 return -ENOMEM; 83 84 if (!info) { 85 int seq = get_cpu_var(taskstats_seqnum)++; 86 put_cpu_var(taskstats_seqnum); 87 88 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 89 } else 90 reply = genlmsg_put_reply(skb, info, &family, 0, cmd); 91 if (reply == NULL) { 92 nlmsg_free(skb); 93 return -EINVAL; 94 } 95 96 *skbp = skb; 97 return 0; 98 } 99 100 /* 101 * Send taskstats data in @skb to listener with nl_pid @pid 102 */ 103 static int send_reply(struct sk_buff *skb, pid_t pid) 104 { 105 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 106 void *reply = genlmsg_data(genlhdr); 107 int rc; 108 109 rc = genlmsg_end(skb, reply); 110 if (rc < 0) { 111 nlmsg_free(skb); 112 return rc; 113 } 114 115 return genlmsg_unicast(skb, pid); 116 } 117 118 /* 119 * Send taskstats data in @skb to listeners registered for @cpu's exit data 120 */ 121 static void send_cpu_listeners(struct sk_buff *skb, 122 struct listener_list *listeners) 123 { 124 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 125 struct listener *s, *tmp; 126 struct sk_buff *skb_next, *skb_cur = skb; 127 void *reply = genlmsg_data(genlhdr); 128 int rc, delcount = 0; 129 130 rc = genlmsg_end(skb, reply); 131 if (rc < 0) { 132 nlmsg_free(skb); 133 return; 134 } 135 136 rc = 0; 137 down_read(&listeners->sem); 138 list_for_each_entry(s, &listeners->list, list) { 139 skb_next = NULL; 140 if (!list_is_last(&s->list, &listeners->list)) { 141 skb_next = skb_clone(skb_cur, GFP_KERNEL); 142 if (!skb_next) 143 break; 144 } 145 rc = genlmsg_unicast(skb_cur, s->pid); 146 if (rc == -ECONNREFUSED) { 147 s->valid = 0; 148 delcount++; 149 } 150 skb_cur = skb_next; 151 } 152 up_read(&listeners->sem); 153 154 if (skb_cur) 155 nlmsg_free(skb_cur); 156 157 if (!delcount) 158 return; 159 160 /* Delete invalidated entries */ 161 down_write(&listeners->sem); 162 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 163 if (!s->valid) { 164 list_del(&s->list); 165 kfree(s); 166 } 167 } 168 up_write(&listeners->sem); 169 } 170 171 static int fill_pid(pid_t pid, struct task_struct *tsk, 172 struct taskstats *stats) 173 { 174 int rc = 0; 175 176 if (!tsk) { 177 rcu_read_lock(); 178 tsk = find_task_by_pid(pid); 179 if (tsk) 180 get_task_struct(tsk); 181 rcu_read_unlock(); 182 if (!tsk) 183 return -ESRCH; 184 } else 185 get_task_struct(tsk); 186 187 memset(stats, 0, sizeof(*stats)); 188 /* 189 * Each accounting subsystem adds calls to its functions to 190 * fill in relevant parts of struct taskstsats as follows 191 * 192 * per-task-foo(stats, tsk); 193 */ 194 195 delayacct_add_tsk(stats, tsk); 196 197 /* fill in basic acct fields */ 198 stats->version = TASKSTATS_VERSION; 199 stats->nvcsw = tsk->nvcsw; 200 stats->nivcsw = tsk->nivcsw; 201 bacct_add_tsk(stats, tsk); 202 203 /* fill in extended acct fields */ 204 xacct_add_tsk(stats, tsk); 205 206 /* Define err: label here if needed */ 207 put_task_struct(tsk); 208 return rc; 209 210 } 211 212 static int fill_tgid(pid_t tgid, struct task_struct *first, 213 struct taskstats *stats) 214 { 215 struct task_struct *tsk; 216 unsigned long flags; 217 int rc = -ESRCH; 218 219 /* 220 * Add additional stats from live tasks except zombie thread group 221 * leaders who are already counted with the dead tasks 222 */ 223 rcu_read_lock(); 224 if (!first) 225 first = find_task_by_pid(tgid); 226 227 if (!first || !lock_task_sighand(first, &flags)) 228 goto out; 229 230 if (first->signal->stats) 231 memcpy(stats, first->signal->stats, sizeof(*stats)); 232 else 233 memset(stats, 0, sizeof(*stats)); 234 235 tsk = first; 236 do { 237 if (tsk->exit_state) 238 continue; 239 /* 240 * Accounting subsystem can call its functions here to 241 * fill in relevant parts of struct taskstsats as follows 242 * 243 * per-task-foo(stats, tsk); 244 */ 245 delayacct_add_tsk(stats, tsk); 246 247 stats->nvcsw += tsk->nvcsw; 248 stats->nivcsw += tsk->nivcsw; 249 } while_each_thread(first, tsk); 250 251 unlock_task_sighand(first, &flags); 252 rc = 0; 253 out: 254 rcu_read_unlock(); 255 256 stats->version = TASKSTATS_VERSION; 257 /* 258 * Accounting subsytems can also add calls here to modify 259 * fields of taskstats. 260 */ 261 return rc; 262 } 263 264 265 static void fill_tgid_exit(struct task_struct *tsk) 266 { 267 unsigned long flags; 268 269 spin_lock_irqsave(&tsk->sighand->siglock, flags); 270 if (!tsk->signal->stats) 271 goto ret; 272 273 /* 274 * Each accounting subsystem calls its functions here to 275 * accumalate its per-task stats for tsk, into the per-tgid structure 276 * 277 * per-task-foo(tsk->signal->stats, tsk); 278 */ 279 delayacct_add_tsk(tsk->signal->stats, tsk); 280 ret: 281 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 282 return; 283 } 284 285 static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) 286 { 287 struct listener_list *listeners; 288 struct listener *s, *tmp; 289 unsigned int cpu; 290 cpumask_t mask = *maskp; 291 292 if (!cpus_subset(mask, cpu_possible_map)) 293 return -EINVAL; 294 295 if (isadd == REGISTER) { 296 for_each_cpu_mask(cpu, mask) { 297 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 298 cpu_to_node(cpu)); 299 if (!s) 300 goto cleanup; 301 s->pid = pid; 302 INIT_LIST_HEAD(&s->list); 303 s->valid = 1; 304 305 listeners = &per_cpu(listener_array, cpu); 306 down_write(&listeners->sem); 307 list_add(&s->list, &listeners->list); 308 up_write(&listeners->sem); 309 } 310 return 0; 311 } 312 313 /* Deregister or cleanup */ 314 cleanup: 315 for_each_cpu_mask(cpu, mask) { 316 listeners = &per_cpu(listener_array, cpu); 317 down_write(&listeners->sem); 318 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 319 if (s->pid == pid) { 320 list_del(&s->list); 321 kfree(s); 322 break; 323 } 324 } 325 up_write(&listeners->sem); 326 } 327 return 0; 328 } 329 330 static int parse(struct nlattr *na, cpumask_t *mask) 331 { 332 char *data; 333 int len; 334 int ret; 335 336 if (na == NULL) 337 return 1; 338 len = nla_len(na); 339 if (len > TASKSTATS_CPUMASK_MAXLEN) 340 return -E2BIG; 341 if (len < 1) 342 return -EINVAL; 343 data = kmalloc(len, GFP_KERNEL); 344 if (!data) 345 return -ENOMEM; 346 nla_strlcpy(data, na, len); 347 ret = cpulist_parse(data, *mask); 348 kfree(data); 349 return ret; 350 } 351 352 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 353 { 354 struct nlattr *na, *ret; 355 int aggr; 356 357 aggr = (type == TASKSTATS_TYPE_PID) 358 ? TASKSTATS_TYPE_AGGR_PID 359 : TASKSTATS_TYPE_AGGR_TGID; 360 361 na = nla_nest_start(skb, aggr); 362 if (!na) 363 goto err; 364 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 365 goto err; 366 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 367 if (!ret) 368 goto err; 369 nla_nest_end(skb, na); 370 371 return nla_data(ret); 372 err: 373 return NULL; 374 } 375 376 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 377 { 378 int rc = 0; 379 struct sk_buff *rep_skb; 380 struct taskstats *stats; 381 size_t size; 382 cpumask_t mask; 383 384 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); 385 if (rc < 0) 386 return rc; 387 if (rc == 0) 388 return add_del_listener(info->snd_pid, &mask, REGISTER); 389 390 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); 391 if (rc < 0) 392 return rc; 393 if (rc == 0) 394 return add_del_listener(info->snd_pid, &mask, DEREGISTER); 395 396 /* 397 * Size includes space for nested attributes 398 */ 399 size = nla_total_size(sizeof(u32)) + 400 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 401 402 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 403 if (rc < 0) 404 return rc; 405 406 rc = -EINVAL; 407 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 408 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 409 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 410 if (!stats) 411 goto err; 412 413 rc = fill_pid(pid, NULL, stats); 414 if (rc < 0) 415 goto err; 416 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 417 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 418 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 419 if (!stats) 420 goto err; 421 422 rc = fill_tgid(tgid, NULL, stats); 423 if (rc < 0) 424 goto err; 425 } else 426 goto err; 427 428 return send_reply(rep_skb, info->snd_pid); 429 err: 430 nlmsg_free(rep_skb); 431 return rc; 432 } 433 434 static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 435 { 436 struct signal_struct *sig = tsk->signal; 437 struct taskstats *stats; 438 439 if (sig->stats || thread_group_empty(tsk)) 440 goto ret; 441 442 /* No problem if kmem_cache_zalloc() fails */ 443 stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); 444 445 spin_lock_irq(&tsk->sighand->siglock); 446 if (!sig->stats) { 447 sig->stats = stats; 448 stats = NULL; 449 } 450 spin_unlock_irq(&tsk->sighand->siglock); 451 452 if (stats) 453 kmem_cache_free(taskstats_cache, stats); 454 ret: 455 return sig->stats; 456 } 457 458 /* Send pid data out on exit */ 459 void taskstats_exit(struct task_struct *tsk, int group_dead) 460 { 461 int rc; 462 struct listener_list *listeners; 463 struct taskstats *stats; 464 struct sk_buff *rep_skb; 465 size_t size; 466 int is_thread_group; 467 468 if (!family_registered) 469 return; 470 471 /* 472 * Size includes space for nested attributes 473 */ 474 size = nla_total_size(sizeof(u32)) + 475 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 476 477 is_thread_group = !!taskstats_tgid_alloc(tsk); 478 if (is_thread_group) { 479 /* PID + STATS + TGID + STATS */ 480 size = 2 * size; 481 /* fill the tsk->signal->stats structure */ 482 fill_tgid_exit(tsk); 483 } 484 485 listeners = &__raw_get_cpu_var(listener_array); 486 if (list_empty(&listeners->list)) 487 return; 488 489 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); 490 if (rc < 0) 491 return; 492 493 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); 494 if (!stats) 495 goto err; 496 497 rc = fill_pid(tsk->pid, tsk, stats); 498 if (rc < 0) 499 goto err; 500 501 /* 502 * Doesn't matter if tsk is the leader or the last group member leaving 503 */ 504 if (!is_thread_group || !group_dead) 505 goto send; 506 507 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); 508 if (!stats) 509 goto err; 510 511 memcpy(stats, tsk->signal->stats, sizeof(*stats)); 512 513 send: 514 send_cpu_listeners(rep_skb, listeners); 515 return; 516 err: 517 nlmsg_free(rep_skb); 518 } 519 520 static struct genl_ops taskstats_ops = { 521 .cmd = TASKSTATS_CMD_GET, 522 .doit = taskstats_user_cmd, 523 .policy = taskstats_cmd_get_policy, 524 }; 525 526 /* Needed early in initialization */ 527 void __init taskstats_init_early(void) 528 { 529 unsigned int i; 530 531 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); 532 for_each_possible_cpu(i) { 533 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 534 init_rwsem(&(per_cpu(listener_array, i).sem)); 535 } 536 } 537 538 static int __init taskstats_init(void) 539 { 540 int rc; 541 542 rc = genl_register_family(&family); 543 if (rc) 544 return rc; 545 546 rc = genl_register_ops(&family, &taskstats_ops); 547 if (rc < 0) 548 goto err; 549 550 family_registered = 1; 551 return 0; 552 err: 553 genl_unregister_family(&family); 554 return rc; 555 } 556 557 /* 558 * late initcall ensures initialization of statistics collection 559 * mechanisms precedes initialization of the taskstats interface 560 */ 561 late_initcall(taskstats_init); 562