1 /* 2 * taskstats.c - Export per-task statistics to userland 3 * 4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5 * (C) Balbir Singh, IBM Corp. 2006 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/taskstats_kern.h> 21 #include <linux/tsacct_kern.h> 22 #include <linux/delayacct.h> 23 #include <linux/cpumask.h> 24 #include <linux/percpu.h> 25 #include <net/genetlink.h> 26 #include <asm/atomic.h> 27 28 /* 29 * Maximum length of a cpumask that can be specified in 30 * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute 31 */ 32 #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 33 34 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 35 static int family_registered; 36 struct kmem_cache *taskstats_cache; 37 38 static struct genl_family family = { 39 .id = GENL_ID_GENERATE, 40 .name = TASKSTATS_GENL_NAME, 41 .version = TASKSTATS_GENL_VERSION, 42 .maxattr = TASKSTATS_CMD_ATTR_MAX, 43 }; 44 45 static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] 46 __read_mostly = { 47 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 48 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 49 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 50 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 51 52 struct listener { 53 struct list_head list; 54 pid_t pid; 55 char valid; 56 }; 57 58 struct listener_list { 59 struct rw_semaphore sem; 60 struct list_head list; 61 }; 62 static DEFINE_PER_CPU(struct listener_list, listener_array); 63 64 enum actions { 65 REGISTER, 66 DEREGISTER, 67 CPU_DONT_CARE 68 }; 69 70 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 71 size_t size) 72 { 73 struct sk_buff *skb; 74 void *reply; 75 76 /* 77 * If new attributes are added, please revisit this allocation 78 */ 79 skb = genlmsg_new(size, GFP_KERNEL); 80 if (!skb) 81 return -ENOMEM; 82 83 if (!info) { 84 int seq = get_cpu_var(taskstats_seqnum)++; 85 put_cpu_var(taskstats_seqnum); 86 87 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 88 } else 89 reply = genlmsg_put_reply(skb, info, &family, 0, cmd); 90 if (reply == NULL) { 91 nlmsg_free(skb); 92 return -EINVAL; 93 } 94 95 *skbp = skb; 96 return 0; 97 } 98 99 /* 100 * Send taskstats data in @skb to listener with nl_pid @pid 101 */ 102 static int send_reply(struct sk_buff *skb, pid_t pid) 103 { 104 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 105 void *reply = genlmsg_data(genlhdr); 106 int rc; 107 108 rc = genlmsg_end(skb, reply); 109 if (rc < 0) { 110 nlmsg_free(skb); 111 return rc; 112 } 113 114 return genlmsg_unicast(skb, pid); 115 } 116 117 /* 118 * Send taskstats data in @skb to listeners registered for @cpu's exit data 119 */ 120 static void send_cpu_listeners(struct sk_buff *skb, 121 struct listener_list *listeners) 122 { 123 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 124 struct listener *s, *tmp; 125 struct sk_buff *skb_next, *skb_cur = skb; 126 void *reply = genlmsg_data(genlhdr); 127 int rc, delcount = 0; 128 129 rc = genlmsg_end(skb, reply); 130 if (rc < 0) { 131 nlmsg_free(skb); 132 return; 133 } 134 135 rc = 0; 136 down_read(&listeners->sem); 137 list_for_each_entry(s, &listeners->list, list) { 138 skb_next = NULL; 139 if (!list_is_last(&s->list, &listeners->list)) { 140 skb_next = skb_clone(skb_cur, GFP_KERNEL); 141 if (!skb_next) 142 break; 143 } 144 rc = genlmsg_unicast(skb_cur, s->pid); 145 if (rc == -ECONNREFUSED) { 146 s->valid = 0; 147 delcount++; 148 } 149 skb_cur = skb_next; 150 } 151 up_read(&listeners->sem); 152 153 if (skb_cur) 154 nlmsg_free(skb_cur); 155 156 if (!delcount) 157 return; 158 159 /* Delete invalidated entries */ 160 down_write(&listeners->sem); 161 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 162 if (!s->valid) { 163 list_del(&s->list); 164 kfree(s); 165 } 166 } 167 up_write(&listeners->sem); 168 } 169 170 static int fill_pid(pid_t pid, struct task_struct *tsk, 171 struct taskstats *stats) 172 { 173 int rc = 0; 174 175 if (!tsk) { 176 rcu_read_lock(); 177 tsk = find_task_by_pid(pid); 178 if (tsk) 179 get_task_struct(tsk); 180 rcu_read_unlock(); 181 if (!tsk) 182 return -ESRCH; 183 } else 184 get_task_struct(tsk); 185 186 memset(stats, 0, sizeof(*stats)); 187 /* 188 * Each accounting subsystem adds calls to its functions to 189 * fill in relevant parts of struct taskstsats as follows 190 * 191 * per-task-foo(stats, tsk); 192 */ 193 194 delayacct_add_tsk(stats, tsk); 195 196 /* fill in basic acct fields */ 197 stats->version = TASKSTATS_VERSION; 198 stats->nvcsw = tsk->nvcsw; 199 stats->nivcsw = tsk->nivcsw; 200 bacct_add_tsk(stats, tsk); 201 202 /* fill in extended acct fields */ 203 xacct_add_tsk(stats, tsk); 204 205 /* Define err: label here if needed */ 206 put_task_struct(tsk); 207 return rc; 208 209 } 210 211 static int fill_tgid(pid_t tgid, struct task_struct *first, 212 struct taskstats *stats) 213 { 214 struct task_struct *tsk; 215 unsigned long flags; 216 int rc = -ESRCH; 217 218 /* 219 * Add additional stats from live tasks except zombie thread group 220 * leaders who are already counted with the dead tasks 221 */ 222 rcu_read_lock(); 223 if (!first) 224 first = find_task_by_pid(tgid); 225 226 if (!first || !lock_task_sighand(first, &flags)) 227 goto out; 228 229 if (first->signal->stats) 230 memcpy(stats, first->signal->stats, sizeof(*stats)); 231 else 232 memset(stats, 0, sizeof(*stats)); 233 234 tsk = first; 235 do { 236 if (tsk->exit_state) 237 continue; 238 /* 239 * Accounting subsystem can call its functions here to 240 * fill in relevant parts of struct taskstsats as follows 241 * 242 * per-task-foo(stats, tsk); 243 */ 244 delayacct_add_tsk(stats, tsk); 245 246 stats->nvcsw += tsk->nvcsw; 247 stats->nivcsw += tsk->nivcsw; 248 } while_each_thread(first, tsk); 249 250 unlock_task_sighand(first, &flags); 251 rc = 0; 252 out: 253 rcu_read_unlock(); 254 255 stats->version = TASKSTATS_VERSION; 256 /* 257 * Accounting subsytems can also add calls here to modify 258 * fields of taskstats. 259 */ 260 return rc; 261 } 262 263 264 static void fill_tgid_exit(struct task_struct *tsk) 265 { 266 unsigned long flags; 267 268 spin_lock_irqsave(&tsk->sighand->siglock, flags); 269 if (!tsk->signal->stats) 270 goto ret; 271 272 /* 273 * Each accounting subsystem calls its functions here to 274 * accumalate its per-task stats for tsk, into the per-tgid structure 275 * 276 * per-task-foo(tsk->signal->stats, tsk); 277 */ 278 delayacct_add_tsk(tsk->signal->stats, tsk); 279 ret: 280 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 281 return; 282 } 283 284 static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) 285 { 286 struct listener_list *listeners; 287 struct listener *s, *tmp; 288 unsigned int cpu; 289 cpumask_t mask = *maskp; 290 291 if (!cpus_subset(mask, cpu_possible_map)) 292 return -EINVAL; 293 294 if (isadd == REGISTER) { 295 for_each_cpu_mask(cpu, mask) { 296 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 297 cpu_to_node(cpu)); 298 if (!s) 299 goto cleanup; 300 s->pid = pid; 301 INIT_LIST_HEAD(&s->list); 302 s->valid = 1; 303 304 listeners = &per_cpu(listener_array, cpu); 305 down_write(&listeners->sem); 306 list_add(&s->list, &listeners->list); 307 up_write(&listeners->sem); 308 } 309 return 0; 310 } 311 312 /* Deregister or cleanup */ 313 cleanup: 314 for_each_cpu_mask(cpu, mask) { 315 listeners = &per_cpu(listener_array, cpu); 316 down_write(&listeners->sem); 317 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 318 if (s->pid == pid) { 319 list_del(&s->list); 320 kfree(s); 321 break; 322 } 323 } 324 up_write(&listeners->sem); 325 } 326 return 0; 327 } 328 329 static int parse(struct nlattr *na, cpumask_t *mask) 330 { 331 char *data; 332 int len; 333 int ret; 334 335 if (na == NULL) 336 return 1; 337 len = nla_len(na); 338 if (len > TASKSTATS_CPUMASK_MAXLEN) 339 return -E2BIG; 340 if (len < 1) 341 return -EINVAL; 342 data = kmalloc(len, GFP_KERNEL); 343 if (!data) 344 return -ENOMEM; 345 nla_strlcpy(data, na, len); 346 ret = cpulist_parse(data, *mask); 347 kfree(data); 348 return ret; 349 } 350 351 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 352 { 353 struct nlattr *na, *ret; 354 int aggr; 355 356 aggr = (type == TASKSTATS_TYPE_PID) 357 ? TASKSTATS_TYPE_AGGR_PID 358 : TASKSTATS_TYPE_AGGR_TGID; 359 360 na = nla_nest_start(skb, aggr); 361 if (!na) 362 goto err; 363 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 364 goto err; 365 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 366 if (!ret) 367 goto err; 368 nla_nest_end(skb, na); 369 370 return nla_data(ret); 371 err: 372 return NULL; 373 } 374 375 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 376 { 377 int rc = 0; 378 struct sk_buff *rep_skb; 379 struct taskstats *stats; 380 size_t size; 381 cpumask_t mask; 382 383 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); 384 if (rc < 0) 385 return rc; 386 if (rc == 0) 387 return add_del_listener(info->snd_pid, &mask, REGISTER); 388 389 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); 390 if (rc < 0) 391 return rc; 392 if (rc == 0) 393 return add_del_listener(info->snd_pid, &mask, DEREGISTER); 394 395 /* 396 * Size includes space for nested attributes 397 */ 398 size = nla_total_size(sizeof(u32)) + 399 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 400 401 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 402 if (rc < 0) 403 return rc; 404 405 rc = -EINVAL; 406 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 407 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 408 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 409 if (!stats) 410 goto err; 411 412 rc = fill_pid(pid, NULL, stats); 413 if (rc < 0) 414 goto err; 415 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 416 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 417 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 418 if (!stats) 419 goto err; 420 421 rc = fill_tgid(tgid, NULL, stats); 422 if (rc < 0) 423 goto err; 424 } else 425 goto err; 426 427 return send_reply(rep_skb, info->snd_pid); 428 err: 429 nlmsg_free(rep_skb); 430 return rc; 431 } 432 433 static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 434 { 435 struct signal_struct *sig = tsk->signal; 436 struct taskstats *stats; 437 438 if (sig->stats || thread_group_empty(tsk)) 439 goto ret; 440 441 /* No problem if kmem_cache_zalloc() fails */ 442 stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); 443 444 spin_lock_irq(&tsk->sighand->siglock); 445 if (!sig->stats) { 446 sig->stats = stats; 447 stats = NULL; 448 } 449 spin_unlock_irq(&tsk->sighand->siglock); 450 451 if (stats) 452 kmem_cache_free(taskstats_cache, stats); 453 ret: 454 return sig->stats; 455 } 456 457 /* Send pid data out on exit */ 458 void taskstats_exit(struct task_struct *tsk, int group_dead) 459 { 460 int rc; 461 struct listener_list *listeners; 462 struct taskstats *stats; 463 struct sk_buff *rep_skb; 464 size_t size; 465 int is_thread_group; 466 467 if (!family_registered) 468 return; 469 470 /* 471 * Size includes space for nested attributes 472 */ 473 size = nla_total_size(sizeof(u32)) + 474 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 475 476 is_thread_group = !!taskstats_tgid_alloc(tsk); 477 if (is_thread_group) { 478 /* PID + STATS + TGID + STATS */ 479 size = 2 * size; 480 /* fill the tsk->signal->stats structure */ 481 fill_tgid_exit(tsk); 482 } 483 484 listeners = &__raw_get_cpu_var(listener_array); 485 if (list_empty(&listeners->list)) 486 return; 487 488 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); 489 if (rc < 0) 490 return; 491 492 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); 493 if (!stats) 494 goto err; 495 496 rc = fill_pid(tsk->pid, tsk, stats); 497 if (rc < 0) 498 goto err; 499 500 /* 501 * Doesn't matter if tsk is the leader or the last group member leaving 502 */ 503 if (!is_thread_group || !group_dead) 504 goto send; 505 506 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); 507 if (!stats) 508 goto err; 509 510 memcpy(stats, tsk->signal->stats, sizeof(*stats)); 511 512 send: 513 send_cpu_listeners(rep_skb, listeners); 514 return; 515 err: 516 nlmsg_free(rep_skb); 517 } 518 519 static struct genl_ops taskstats_ops = { 520 .cmd = TASKSTATS_CMD_GET, 521 .doit = taskstats_user_cmd, 522 .policy = taskstats_cmd_get_policy, 523 }; 524 525 /* Needed early in initialization */ 526 void __init taskstats_init_early(void) 527 { 528 unsigned int i; 529 530 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); 531 for_each_possible_cpu(i) { 532 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 533 init_rwsem(&(per_cpu(listener_array, i).sem)); 534 } 535 } 536 537 static int __init taskstats_init(void) 538 { 539 int rc; 540 541 rc = genl_register_family(&family); 542 if (rc) 543 return rc; 544 545 rc = genl_register_ops(&family, &taskstats_ops); 546 if (rc < 0) 547 goto err; 548 549 family_registered = 1; 550 return 0; 551 err: 552 genl_unregister_family(&family); 553 return rc; 554 } 555 556 /* 557 * late initcall ensures initialization of statistics collection 558 * mechanisms precedes initialization of the taskstats interface 559 */ 560 late_initcall(taskstats_init); 561