1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/errno.h> 53 #include <linux/time.h> 54 #include <linux/proc_fs.h> 55 #include <linux/stat.h> 56 #include <linux/init.h> 57 #include <linux/capability.h> 58 #include <linux/file.h> 59 #include <linux/string.h> 60 #include <linux/seq_file.h> 61 #include <linux/namei.h> 62 #include <linux/namespace.h> 63 #include <linux/mm.h> 64 #include <linux/smp_lock.h> 65 #include <linux/rcupdate.h> 66 #include <linux/kallsyms.h> 67 #include <linux/mount.h> 68 #include <linux/security.h> 69 #include <linux/ptrace.h> 70 #include <linux/seccomp.h> 71 #include <linux/cpuset.h> 72 #include <linux/audit.h> 73 #include <linux/poll.h> 74 #include "internal.h" 75 76 /* NOTE: 77 * Implementing inode permission operations in /proc is almost 78 * certainly an error. Permission checks need to happen during 79 * each system call not at open time. The reason is that most of 80 * what we wish to check for permissions in /proc varies at runtime. 81 * 82 * The classic example of a problem is opening file descriptors 83 * in /proc for a task before it execs a suid executable. 84 */ 85 86 /* 87 * For hysterical raisins we keep the same inumbers as in the old procfs. 88 * Feel free to change the macro below - just keep the range distinct from 89 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 90 * As soon as we'll get a separate superblock we will be able to forget 91 * about magical ranges too. 92 */ 93 94 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 95 96 enum pid_directory_inos { 97 PROC_TGID_INO = 2, 98 PROC_TGID_TASK, 99 PROC_TGID_STATUS, 100 PROC_TGID_MEM, 101 #ifdef CONFIG_SECCOMP 102 PROC_TGID_SECCOMP, 103 #endif 104 PROC_TGID_CWD, 105 PROC_TGID_ROOT, 106 PROC_TGID_EXE, 107 PROC_TGID_FD, 108 PROC_TGID_ENVIRON, 109 PROC_TGID_AUXV, 110 PROC_TGID_CMDLINE, 111 PROC_TGID_STAT, 112 PROC_TGID_STATM, 113 PROC_TGID_MAPS, 114 PROC_TGID_NUMA_MAPS, 115 PROC_TGID_MOUNTS, 116 PROC_TGID_MOUNTSTATS, 117 PROC_TGID_WCHAN, 118 #ifdef CONFIG_MMU 119 PROC_TGID_SMAPS, 120 #endif 121 #ifdef CONFIG_SCHEDSTATS 122 PROC_TGID_SCHEDSTAT, 123 #endif 124 #ifdef CONFIG_CPUSETS 125 PROC_TGID_CPUSET, 126 #endif 127 #ifdef CONFIG_SECURITY 128 PROC_TGID_ATTR, 129 PROC_TGID_ATTR_CURRENT, 130 PROC_TGID_ATTR_PREV, 131 PROC_TGID_ATTR_EXEC, 132 PROC_TGID_ATTR_FSCREATE, 133 PROC_TGID_ATTR_KEYCREATE, 134 PROC_TGID_ATTR_SOCKCREATE, 135 #endif 136 #ifdef CONFIG_AUDITSYSCALL 137 PROC_TGID_LOGINUID, 138 #endif 139 PROC_TGID_OOM_SCORE, 140 PROC_TGID_OOM_ADJUST, 141 PROC_TID_INO, 142 PROC_TID_STATUS, 143 PROC_TID_MEM, 144 #ifdef CONFIG_SECCOMP 145 PROC_TID_SECCOMP, 146 #endif 147 PROC_TID_CWD, 148 PROC_TID_ROOT, 149 PROC_TID_EXE, 150 PROC_TID_FD, 151 PROC_TID_ENVIRON, 152 PROC_TID_AUXV, 153 PROC_TID_CMDLINE, 154 PROC_TID_STAT, 155 PROC_TID_STATM, 156 PROC_TID_MAPS, 157 PROC_TID_NUMA_MAPS, 158 PROC_TID_MOUNTS, 159 PROC_TID_MOUNTSTATS, 160 PROC_TID_WCHAN, 161 #ifdef CONFIG_MMU 162 PROC_TID_SMAPS, 163 #endif 164 #ifdef CONFIG_SCHEDSTATS 165 PROC_TID_SCHEDSTAT, 166 #endif 167 #ifdef CONFIG_CPUSETS 168 PROC_TID_CPUSET, 169 #endif 170 #ifdef CONFIG_SECURITY 171 PROC_TID_ATTR, 172 PROC_TID_ATTR_CURRENT, 173 PROC_TID_ATTR_PREV, 174 PROC_TID_ATTR_EXEC, 175 PROC_TID_ATTR_FSCREATE, 176 PROC_TID_ATTR_KEYCREATE, 177 PROC_TID_ATTR_SOCKCREATE, 178 #endif 179 #ifdef CONFIG_AUDITSYSCALL 180 PROC_TID_LOGINUID, 181 #endif 182 PROC_TID_OOM_SCORE, 183 PROC_TID_OOM_ADJUST, 184 185 /* Add new entries before this */ 186 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 187 }; 188 189 /* Worst case buffer size needed for holding an integer. */ 190 #define PROC_NUMBUF 10 191 192 struct pid_entry { 193 int type; 194 int len; 195 char *name; 196 mode_t mode; 197 }; 198 199 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 200 201 static struct pid_entry tgid_base_stuff[] = { 202 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 203 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 204 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 205 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 206 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 207 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 208 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 209 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 210 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 211 #ifdef CONFIG_NUMA 212 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 213 #endif 214 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 215 #ifdef CONFIG_SECCOMP 216 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 217 #endif 218 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 219 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 220 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 221 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 222 E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR), 223 #ifdef CONFIG_MMU 224 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 225 #endif 226 #ifdef CONFIG_SECURITY 227 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 228 #endif 229 #ifdef CONFIG_KALLSYMS 230 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 231 #endif 232 #ifdef CONFIG_SCHEDSTATS 233 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 234 #endif 235 #ifdef CONFIG_CPUSETS 236 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 237 #endif 238 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 239 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 240 #ifdef CONFIG_AUDITSYSCALL 241 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 242 #endif 243 {0,0,NULL,0} 244 }; 245 static struct pid_entry tid_base_stuff[] = { 246 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 247 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 248 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 249 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 250 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 251 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 252 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 253 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 254 #ifdef CONFIG_NUMA 255 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 256 #endif 257 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 258 #ifdef CONFIG_SECCOMP 259 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 260 #endif 261 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 262 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 263 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 264 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 265 #ifdef CONFIG_MMU 266 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), 267 #endif 268 #ifdef CONFIG_SECURITY 269 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 270 #endif 271 #ifdef CONFIG_KALLSYMS 272 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 273 #endif 274 #ifdef CONFIG_SCHEDSTATS 275 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 276 #endif 277 #ifdef CONFIG_CPUSETS 278 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 279 #endif 280 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 281 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 282 #ifdef CONFIG_AUDITSYSCALL 283 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 284 #endif 285 {0,0,NULL,0} 286 }; 287 288 #ifdef CONFIG_SECURITY 289 static struct pid_entry tgid_attr_stuff[] = { 290 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 291 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 292 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 293 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 294 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), 295 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), 296 {0,0,NULL,0} 297 }; 298 static struct pid_entry tid_attr_stuff[] = { 299 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 300 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 301 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 302 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 303 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), 304 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), 305 {0,0,NULL,0} 306 }; 307 #endif 308 309 #undef E 310 311 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 312 { 313 struct task_struct *task = get_proc_task(inode); 314 struct files_struct *files = NULL; 315 struct file *file; 316 int fd = proc_fd(inode); 317 318 if (task) { 319 files = get_files_struct(task); 320 put_task_struct(task); 321 } 322 if (files) { 323 /* 324 * We are not taking a ref to the file structure, so we must 325 * hold ->file_lock. 326 */ 327 spin_lock(&files->file_lock); 328 file = fcheck_files(files, fd); 329 if (file) { 330 *mnt = mntget(file->f_vfsmnt); 331 *dentry = dget(file->f_dentry); 332 spin_unlock(&files->file_lock); 333 put_files_struct(files); 334 return 0; 335 } 336 spin_unlock(&files->file_lock); 337 put_files_struct(files); 338 } 339 return -ENOENT; 340 } 341 342 static struct fs_struct *get_fs_struct(struct task_struct *task) 343 { 344 struct fs_struct *fs; 345 task_lock(task); 346 fs = task->fs; 347 if(fs) 348 atomic_inc(&fs->count); 349 task_unlock(task); 350 return fs; 351 } 352 353 static int get_nr_threads(struct task_struct *tsk) 354 { 355 /* Must be called with the rcu_read_lock held */ 356 unsigned long flags; 357 int count = 0; 358 359 if (lock_task_sighand(tsk, &flags)) { 360 count = atomic_read(&tsk->signal->count); 361 unlock_task_sighand(tsk, &flags); 362 } 363 return count; 364 } 365 366 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 367 { 368 struct task_struct *task = get_proc_task(inode); 369 struct fs_struct *fs = NULL; 370 int result = -ENOENT; 371 372 if (task) { 373 fs = get_fs_struct(task); 374 put_task_struct(task); 375 } 376 if (fs) { 377 read_lock(&fs->lock); 378 *mnt = mntget(fs->pwdmnt); 379 *dentry = dget(fs->pwd); 380 read_unlock(&fs->lock); 381 result = 0; 382 put_fs_struct(fs); 383 } 384 return result; 385 } 386 387 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 388 { 389 struct task_struct *task = get_proc_task(inode); 390 struct fs_struct *fs = NULL; 391 int result = -ENOENT; 392 393 if (task) { 394 fs = get_fs_struct(task); 395 put_task_struct(task); 396 } 397 if (fs) { 398 read_lock(&fs->lock); 399 *mnt = mntget(fs->rootmnt); 400 *dentry = dget(fs->root); 401 read_unlock(&fs->lock); 402 result = 0; 403 put_fs_struct(fs); 404 } 405 return result; 406 } 407 408 #define MAY_PTRACE(task) \ 409 (task == current || \ 410 (task->parent == current && \ 411 (task->ptrace & PT_PTRACED) && \ 412 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 413 security_ptrace(current,task) == 0)) 414 415 static int proc_pid_environ(struct task_struct *task, char * buffer) 416 { 417 int res = 0; 418 struct mm_struct *mm = get_task_mm(task); 419 if (mm) { 420 unsigned int len = mm->env_end - mm->env_start; 421 if (len > PAGE_SIZE) 422 len = PAGE_SIZE; 423 res = access_process_vm(task, mm->env_start, buffer, len, 0); 424 if (!ptrace_may_attach(task)) 425 res = -ESRCH; 426 mmput(mm); 427 } 428 return res; 429 } 430 431 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 432 { 433 int res = 0; 434 unsigned int len; 435 struct mm_struct *mm = get_task_mm(task); 436 if (!mm) 437 goto out; 438 if (!mm->arg_end) 439 goto out_mm; /* Shh! No looking before we're done */ 440 441 len = mm->arg_end - mm->arg_start; 442 443 if (len > PAGE_SIZE) 444 len = PAGE_SIZE; 445 446 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 447 448 // If the nul at the end of args has been overwritten, then 449 // assume application is using setproctitle(3). 450 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 451 len = strnlen(buffer, res); 452 if (len < res) { 453 res = len; 454 } else { 455 len = mm->env_end - mm->env_start; 456 if (len > PAGE_SIZE - res) 457 len = PAGE_SIZE - res; 458 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 459 res = strnlen(buffer, res); 460 } 461 } 462 out_mm: 463 mmput(mm); 464 out: 465 return res; 466 } 467 468 static int proc_pid_auxv(struct task_struct *task, char *buffer) 469 { 470 int res = 0; 471 struct mm_struct *mm = get_task_mm(task); 472 if (mm) { 473 unsigned int nwords = 0; 474 do 475 nwords += 2; 476 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 477 res = nwords * sizeof(mm->saved_auxv[0]); 478 if (res > PAGE_SIZE) 479 res = PAGE_SIZE; 480 memcpy(buffer, mm->saved_auxv, res); 481 mmput(mm); 482 } 483 return res; 484 } 485 486 487 #ifdef CONFIG_KALLSYMS 488 /* 489 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 490 * Returns the resolved symbol. If that fails, simply return the address. 491 */ 492 static int proc_pid_wchan(struct task_struct *task, char *buffer) 493 { 494 char *modname; 495 const char *sym_name; 496 unsigned long wchan, size, offset; 497 char namebuf[KSYM_NAME_LEN+1]; 498 499 wchan = get_wchan(task); 500 501 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 502 if (sym_name) 503 return sprintf(buffer, "%s", sym_name); 504 return sprintf(buffer, "%lu", wchan); 505 } 506 #endif /* CONFIG_KALLSYMS */ 507 508 #ifdef CONFIG_SCHEDSTATS 509 /* 510 * Provides /proc/PID/schedstat 511 */ 512 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 513 { 514 return sprintf(buffer, "%lu %lu %lu\n", 515 task->sched_info.cpu_time, 516 task->sched_info.run_delay, 517 task->sched_info.pcnt); 518 } 519 #endif 520 521 /* The badness from the OOM killer */ 522 unsigned long badness(struct task_struct *p, unsigned long uptime); 523 static int proc_oom_score(struct task_struct *task, char *buffer) 524 { 525 unsigned long points; 526 struct timespec uptime; 527 528 do_posix_clock_monotonic_gettime(&uptime); 529 points = badness(task, uptime.tv_sec); 530 return sprintf(buffer, "%lu\n", points); 531 } 532 533 /************************************************************************/ 534 /* Here the fs part begins */ 535 /************************************************************************/ 536 537 /* permission checks */ 538 static int proc_fd_access_allowed(struct inode *inode) 539 { 540 struct task_struct *task; 541 int allowed = 0; 542 /* Allow access to a task's file descriptors if it is us or we 543 * may use ptrace attach to the process and find out that 544 * information. 545 */ 546 task = get_proc_task(inode); 547 if (task) { 548 allowed = ptrace_may_attach(task); 549 put_task_struct(task); 550 } 551 return allowed; 552 } 553 554 static int proc_setattr(struct dentry *dentry, struct iattr *attr) 555 { 556 int error; 557 struct inode *inode = dentry->d_inode; 558 559 if (attr->ia_valid & ATTR_MODE) 560 return -EPERM; 561 562 error = inode_change_ok(inode, attr); 563 if (!error) { 564 error = security_inode_setattr(dentry, attr); 565 if (!error) 566 error = inode_setattr(inode, attr); 567 } 568 return error; 569 } 570 571 static struct inode_operations proc_def_inode_operations = { 572 .setattr = proc_setattr, 573 }; 574 575 extern struct seq_operations mounts_op; 576 struct proc_mounts { 577 struct seq_file m; 578 int event; 579 }; 580 581 static int mounts_open(struct inode *inode, struct file *file) 582 { 583 struct task_struct *task = get_proc_task(inode); 584 struct namespace *namespace = NULL; 585 struct proc_mounts *p; 586 int ret = -EINVAL; 587 588 if (task) { 589 task_lock(task); 590 namespace = task->namespace; 591 if (namespace) 592 get_namespace(namespace); 593 task_unlock(task); 594 put_task_struct(task); 595 } 596 597 if (namespace) { 598 ret = -ENOMEM; 599 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 600 if (p) { 601 file->private_data = &p->m; 602 ret = seq_open(file, &mounts_op); 603 if (!ret) { 604 p->m.private = namespace; 605 p->event = namespace->event; 606 return 0; 607 } 608 kfree(p); 609 } 610 put_namespace(namespace); 611 } 612 return ret; 613 } 614 615 static int mounts_release(struct inode *inode, struct file *file) 616 { 617 struct seq_file *m = file->private_data; 618 struct namespace *namespace = m->private; 619 put_namespace(namespace); 620 return seq_release(inode, file); 621 } 622 623 static unsigned mounts_poll(struct file *file, poll_table *wait) 624 { 625 struct proc_mounts *p = file->private_data; 626 struct namespace *ns = p->m.private; 627 unsigned res = 0; 628 629 poll_wait(file, &ns->poll, wait); 630 631 spin_lock(&vfsmount_lock); 632 if (p->event != ns->event) { 633 p->event = ns->event; 634 res = POLLERR; 635 } 636 spin_unlock(&vfsmount_lock); 637 638 return res; 639 } 640 641 static struct file_operations proc_mounts_operations = { 642 .open = mounts_open, 643 .read = seq_read, 644 .llseek = seq_lseek, 645 .release = mounts_release, 646 .poll = mounts_poll, 647 }; 648 649 extern struct seq_operations mountstats_op; 650 static int mountstats_open(struct inode *inode, struct file *file) 651 { 652 int ret = seq_open(file, &mountstats_op); 653 654 if (!ret) { 655 struct seq_file *m = file->private_data; 656 struct namespace *namespace = NULL; 657 struct task_struct *task = get_proc_task(inode); 658 659 if (task) { 660 task_lock(task); 661 namespace = task->namespace; 662 if (namespace) 663 get_namespace(namespace); 664 task_unlock(task); 665 put_task_struct(task); 666 } 667 668 if (namespace) 669 m->private = namespace; 670 else { 671 seq_release(inode, file); 672 ret = -EINVAL; 673 } 674 } 675 return ret; 676 } 677 678 static struct file_operations proc_mountstats_operations = { 679 .open = mountstats_open, 680 .read = seq_read, 681 .llseek = seq_lseek, 682 .release = mounts_release, 683 }; 684 685 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 686 687 static ssize_t proc_info_read(struct file * file, char __user * buf, 688 size_t count, loff_t *ppos) 689 { 690 struct inode * inode = file->f_dentry->d_inode; 691 unsigned long page; 692 ssize_t length; 693 struct task_struct *task = get_proc_task(inode); 694 695 length = -ESRCH; 696 if (!task) 697 goto out_no_task; 698 699 if (count > PROC_BLOCK_SIZE) 700 count = PROC_BLOCK_SIZE; 701 702 length = -ENOMEM; 703 if (!(page = __get_free_page(GFP_KERNEL))) 704 goto out; 705 706 length = PROC_I(inode)->op.proc_read(task, (char*)page); 707 708 if (length >= 0) 709 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 710 free_page(page); 711 out: 712 put_task_struct(task); 713 out_no_task: 714 return length; 715 } 716 717 static struct file_operations proc_info_file_operations = { 718 .read = proc_info_read, 719 }; 720 721 static int mem_open(struct inode* inode, struct file* file) 722 { 723 file->private_data = (void*)((long)current->self_exec_id); 724 return 0; 725 } 726 727 static ssize_t mem_read(struct file * file, char __user * buf, 728 size_t count, loff_t *ppos) 729 { 730 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 731 char *page; 732 unsigned long src = *ppos; 733 int ret = -ESRCH; 734 struct mm_struct *mm; 735 736 if (!task) 737 goto out_no_task; 738 739 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 740 goto out; 741 742 ret = -ENOMEM; 743 page = (char *)__get_free_page(GFP_USER); 744 if (!page) 745 goto out; 746 747 ret = 0; 748 749 mm = get_task_mm(task); 750 if (!mm) 751 goto out_free; 752 753 ret = -EIO; 754 755 if (file->private_data != (void*)((long)current->self_exec_id)) 756 goto out_put; 757 758 ret = 0; 759 760 while (count > 0) { 761 int this_len, retval; 762 763 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 764 retval = access_process_vm(task, src, page, this_len, 0); 765 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 766 if (!ret) 767 ret = -EIO; 768 break; 769 } 770 771 if (copy_to_user(buf, page, retval)) { 772 ret = -EFAULT; 773 break; 774 } 775 776 ret += retval; 777 src += retval; 778 buf += retval; 779 count -= retval; 780 } 781 *ppos = src; 782 783 out_put: 784 mmput(mm); 785 out_free: 786 free_page((unsigned long) page); 787 out: 788 put_task_struct(task); 789 out_no_task: 790 return ret; 791 } 792 793 #define mem_write NULL 794 795 #ifndef mem_write 796 /* This is a security hazard */ 797 static ssize_t mem_write(struct file * file, const char * buf, 798 size_t count, loff_t *ppos) 799 { 800 int copied = 0; 801 char *page; 802 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 803 unsigned long dst = *ppos; 804 805 copied = -ESRCH; 806 if (!task) 807 goto out_no_task; 808 809 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 810 goto out; 811 812 copied = -ENOMEM; 813 page = (char *)__get_free_page(GFP_USER); 814 if (!page) 815 goto out; 816 817 while (count > 0) { 818 int this_len, retval; 819 820 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 821 if (copy_from_user(page, buf, this_len)) { 822 copied = -EFAULT; 823 break; 824 } 825 retval = access_process_vm(task, dst, page, this_len, 1); 826 if (!retval) { 827 if (!copied) 828 copied = -EIO; 829 break; 830 } 831 copied += retval; 832 buf += retval; 833 dst += retval; 834 count -= retval; 835 } 836 *ppos = dst; 837 free_page((unsigned long) page); 838 out: 839 put_task_struct(task); 840 out_no_task: 841 return copied; 842 } 843 #endif 844 845 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 846 { 847 switch (orig) { 848 case 0: 849 file->f_pos = offset; 850 break; 851 case 1: 852 file->f_pos += offset; 853 break; 854 default: 855 return -EINVAL; 856 } 857 force_successful_syscall_return(); 858 return file->f_pos; 859 } 860 861 static struct file_operations proc_mem_operations = { 862 .llseek = mem_lseek, 863 .read = mem_read, 864 .write = mem_write, 865 .open = mem_open, 866 }; 867 868 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 869 size_t count, loff_t *ppos) 870 { 871 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 872 char buffer[PROC_NUMBUF]; 873 size_t len; 874 int oom_adjust; 875 loff_t __ppos = *ppos; 876 877 if (!task) 878 return -ESRCH; 879 oom_adjust = task->oomkilladj; 880 put_task_struct(task); 881 882 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 883 if (__ppos >= len) 884 return 0; 885 if (count > len-__ppos) 886 count = len-__ppos; 887 if (copy_to_user(buf, buffer + __ppos, count)) 888 return -EFAULT; 889 *ppos = __ppos + count; 890 return count; 891 } 892 893 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 894 size_t count, loff_t *ppos) 895 { 896 struct task_struct *task; 897 char buffer[PROC_NUMBUF], *end; 898 int oom_adjust; 899 900 if (!capable(CAP_SYS_RESOURCE)) 901 return -EPERM; 902 memset(buffer, 0, sizeof(buffer)); 903 if (count > sizeof(buffer) - 1) 904 count = sizeof(buffer) - 1; 905 if (copy_from_user(buffer, buf, count)) 906 return -EFAULT; 907 oom_adjust = simple_strtol(buffer, &end, 0); 908 if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) 909 return -EINVAL; 910 if (*end == '\n') 911 end++; 912 task = get_proc_task(file->f_dentry->d_inode); 913 if (!task) 914 return -ESRCH; 915 task->oomkilladj = oom_adjust; 916 put_task_struct(task); 917 if (end - buffer == 0) 918 return -EIO; 919 return end - buffer; 920 } 921 922 static struct file_operations proc_oom_adjust_operations = { 923 .read = oom_adjust_read, 924 .write = oom_adjust_write, 925 }; 926 927 #ifdef CONFIG_AUDITSYSCALL 928 #define TMPBUFLEN 21 929 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 930 size_t count, loff_t *ppos) 931 { 932 struct inode * inode = file->f_dentry->d_inode; 933 struct task_struct *task = get_proc_task(inode); 934 ssize_t length; 935 char tmpbuf[TMPBUFLEN]; 936 937 if (!task) 938 return -ESRCH; 939 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 940 audit_get_loginuid(task->audit_context)); 941 put_task_struct(task); 942 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 943 } 944 945 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 946 size_t count, loff_t *ppos) 947 { 948 struct inode * inode = file->f_dentry->d_inode; 949 char *page, *tmp; 950 ssize_t length; 951 uid_t loginuid; 952 953 if (!capable(CAP_AUDIT_CONTROL)) 954 return -EPERM; 955 956 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) 957 return -EPERM; 958 959 if (count >= PAGE_SIZE) 960 count = PAGE_SIZE - 1; 961 962 if (*ppos != 0) { 963 /* No partial writes. */ 964 return -EINVAL; 965 } 966 page = (char*)__get_free_page(GFP_USER); 967 if (!page) 968 return -ENOMEM; 969 length = -EFAULT; 970 if (copy_from_user(page, buf, count)) 971 goto out_free_page; 972 973 page[count] = '\0'; 974 loginuid = simple_strtoul(page, &tmp, 10); 975 if (tmp == page) { 976 length = -EINVAL; 977 goto out_free_page; 978 979 } 980 length = audit_set_loginuid(current, loginuid); 981 if (likely(length == 0)) 982 length = count; 983 984 out_free_page: 985 free_page((unsigned long) page); 986 return length; 987 } 988 989 static struct file_operations proc_loginuid_operations = { 990 .read = proc_loginuid_read, 991 .write = proc_loginuid_write, 992 }; 993 #endif 994 995 #ifdef CONFIG_SECCOMP 996 static ssize_t seccomp_read(struct file *file, char __user *buf, 997 size_t count, loff_t *ppos) 998 { 999 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); 1000 char __buf[20]; 1001 loff_t __ppos = *ppos; 1002 size_t len; 1003 1004 if (!tsk) 1005 return -ESRCH; 1006 /* no need to print the trailing zero, so use only len */ 1007 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 1008 put_task_struct(tsk); 1009 if (__ppos >= len) 1010 return 0; 1011 if (count > len - __ppos) 1012 count = len - __ppos; 1013 if (copy_to_user(buf, __buf + __ppos, count)) 1014 return -EFAULT; 1015 *ppos = __ppos + count; 1016 return count; 1017 } 1018 1019 static ssize_t seccomp_write(struct file *file, const char __user *buf, 1020 size_t count, loff_t *ppos) 1021 { 1022 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); 1023 char __buf[20], *end; 1024 unsigned int seccomp_mode; 1025 ssize_t result; 1026 1027 result = -ESRCH; 1028 if (!tsk) 1029 goto out_no_task; 1030 1031 /* can set it only once to be even more secure */ 1032 result = -EPERM; 1033 if (unlikely(tsk->seccomp.mode)) 1034 goto out; 1035 1036 result = -EFAULT; 1037 memset(__buf, 0, sizeof(__buf)); 1038 count = min(count, sizeof(__buf) - 1); 1039 if (copy_from_user(__buf, buf, count)) 1040 goto out; 1041 1042 seccomp_mode = simple_strtoul(__buf, &end, 0); 1043 if (*end == '\n') 1044 end++; 1045 result = -EINVAL; 1046 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1047 tsk->seccomp.mode = seccomp_mode; 1048 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1049 } else 1050 goto out; 1051 result = -EIO; 1052 if (unlikely(!(end - __buf))) 1053 goto out; 1054 result = end - __buf; 1055 out: 1056 put_task_struct(tsk); 1057 out_no_task: 1058 return result; 1059 } 1060 1061 static struct file_operations proc_seccomp_operations = { 1062 .read = seccomp_read, 1063 .write = seccomp_write, 1064 }; 1065 #endif /* CONFIG_SECCOMP */ 1066 1067 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1068 { 1069 struct inode *inode = dentry->d_inode; 1070 int error = -EACCES; 1071 1072 /* We don't need a base pointer in the /proc filesystem */ 1073 path_release(nd); 1074 1075 /* Are we allowed to snoop on the tasks file descriptors? */ 1076 if (!proc_fd_access_allowed(inode)) 1077 goto out; 1078 1079 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1080 nd->last_type = LAST_BIND; 1081 out: 1082 return ERR_PTR(error); 1083 } 1084 1085 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 1086 char __user *buffer, int buflen) 1087 { 1088 struct inode * inode; 1089 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 1090 int len; 1091 1092 if (!tmp) 1093 return -ENOMEM; 1094 1095 inode = dentry->d_inode; 1096 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 1097 len = PTR_ERR(path); 1098 if (IS_ERR(path)) 1099 goto out; 1100 len = tmp + PAGE_SIZE - 1 - path; 1101 1102 if (len > buflen) 1103 len = buflen; 1104 if (copy_to_user(buffer, path, len)) 1105 len = -EFAULT; 1106 out: 1107 free_page((unsigned long)tmp); 1108 return len; 1109 } 1110 1111 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1112 { 1113 int error = -EACCES; 1114 struct inode *inode = dentry->d_inode; 1115 struct dentry *de; 1116 struct vfsmount *mnt = NULL; 1117 1118 /* Are we allowed to snoop on the tasks file descriptors? */ 1119 if (!proc_fd_access_allowed(inode)) 1120 goto out; 1121 1122 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1123 if (error) 1124 goto out; 1125 1126 error = do_proc_readlink(de, mnt, buffer, buflen); 1127 dput(de); 1128 mntput(mnt); 1129 out: 1130 return error; 1131 } 1132 1133 static struct inode_operations proc_pid_link_inode_operations = { 1134 .readlink = proc_pid_readlink, 1135 .follow_link = proc_pid_follow_link, 1136 .setattr = proc_setattr, 1137 }; 1138 1139 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1140 { 1141 struct dentry *dentry = filp->f_dentry; 1142 struct inode *inode = dentry->d_inode; 1143 struct task_struct *p = get_proc_task(inode); 1144 unsigned int fd, tid, ino; 1145 int retval; 1146 char buf[PROC_NUMBUF]; 1147 struct files_struct * files; 1148 struct fdtable *fdt; 1149 1150 retval = -ENOENT; 1151 if (!p) 1152 goto out_no_task; 1153 retval = 0; 1154 tid = p->pid; 1155 1156 fd = filp->f_pos; 1157 switch (fd) { 1158 case 0: 1159 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1160 goto out; 1161 filp->f_pos++; 1162 case 1: 1163 ino = parent_ino(dentry); 1164 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1165 goto out; 1166 filp->f_pos++; 1167 default: 1168 files = get_files_struct(p); 1169 if (!files) 1170 goto out; 1171 rcu_read_lock(); 1172 fdt = files_fdtable(files); 1173 for (fd = filp->f_pos-2; 1174 fd < fdt->max_fds; 1175 fd++, filp->f_pos++) { 1176 unsigned int i,j; 1177 1178 if (!fcheck_files(files, fd)) 1179 continue; 1180 rcu_read_unlock(); 1181 1182 j = PROC_NUMBUF; 1183 i = fd; 1184 do { 1185 j--; 1186 buf[j] = '0' + (i % 10); 1187 i /= 10; 1188 } while (i); 1189 1190 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1191 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1192 rcu_read_lock(); 1193 break; 1194 } 1195 rcu_read_lock(); 1196 } 1197 rcu_read_unlock(); 1198 put_files_struct(files); 1199 } 1200 out: 1201 put_task_struct(p); 1202 out_no_task: 1203 return retval; 1204 } 1205 1206 static int proc_pident_readdir(struct file *filp, 1207 void *dirent, filldir_t filldir, 1208 struct pid_entry *ents, unsigned int nents) 1209 { 1210 int i; 1211 int pid; 1212 struct dentry *dentry = filp->f_dentry; 1213 struct inode *inode = dentry->d_inode; 1214 struct task_struct *task = get_proc_task(inode); 1215 struct pid_entry *p; 1216 ino_t ino; 1217 int ret; 1218 1219 ret = -ENOENT; 1220 if (!task) 1221 goto out; 1222 1223 ret = 0; 1224 pid = task->pid; 1225 put_task_struct(task); 1226 i = filp->f_pos; 1227 switch (i) { 1228 case 0: 1229 ino = inode->i_ino; 1230 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1231 goto out; 1232 i++; 1233 filp->f_pos++; 1234 /* fall through */ 1235 case 1: 1236 ino = parent_ino(dentry); 1237 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1238 goto out; 1239 i++; 1240 filp->f_pos++; 1241 /* fall through */ 1242 default: 1243 i -= 2; 1244 if (i >= nents) { 1245 ret = 1; 1246 goto out; 1247 } 1248 p = ents + i; 1249 while (p->name) { 1250 if (filldir(dirent, p->name, p->len, filp->f_pos, 1251 fake_ino(pid, p->type), p->mode >> 12) < 0) 1252 goto out; 1253 filp->f_pos++; 1254 p++; 1255 } 1256 } 1257 1258 ret = 1; 1259 out: 1260 return ret; 1261 } 1262 1263 static int proc_tgid_base_readdir(struct file * filp, 1264 void * dirent, filldir_t filldir) 1265 { 1266 return proc_pident_readdir(filp,dirent,filldir, 1267 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1268 } 1269 1270 static int proc_tid_base_readdir(struct file * filp, 1271 void * dirent, filldir_t filldir) 1272 { 1273 return proc_pident_readdir(filp,dirent,filldir, 1274 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1275 } 1276 1277 /* building an inode */ 1278 1279 static int task_dumpable(struct task_struct *task) 1280 { 1281 int dumpable = 0; 1282 struct mm_struct *mm; 1283 1284 task_lock(task); 1285 mm = task->mm; 1286 if (mm) 1287 dumpable = mm->dumpable; 1288 task_unlock(task); 1289 if(dumpable == 1) 1290 return 1; 1291 return 0; 1292 } 1293 1294 1295 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1296 { 1297 struct inode * inode; 1298 struct proc_inode *ei; 1299 1300 /* We need a new inode */ 1301 1302 inode = new_inode(sb); 1303 if (!inode) 1304 goto out; 1305 1306 /* Common stuff */ 1307 ei = PROC_I(inode); 1308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1309 inode->i_ino = fake_ino(task->pid, ino); 1310 inode->i_op = &proc_def_inode_operations; 1311 1312 /* 1313 * grab the reference to task. 1314 */ 1315 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); 1316 if (!ei->pid) 1317 goto out_unlock; 1318 1319 inode->i_uid = 0; 1320 inode->i_gid = 0; 1321 if (task_dumpable(task)) { 1322 inode->i_uid = task->euid; 1323 inode->i_gid = task->egid; 1324 } 1325 security_task_to_inode(task, inode); 1326 1327 out: 1328 return inode; 1329 1330 out_unlock: 1331 iput(inode); 1332 return NULL; 1333 } 1334 1335 /* dentry stuff */ 1336 1337 /* 1338 * Exceptional case: normally we are not allowed to unhash a busy 1339 * directory. In this case, however, we can do it - no aliasing problems 1340 * due to the way we treat inodes. 1341 * 1342 * Rewrite the inode's ownerships here because the owning task may have 1343 * performed a setuid(), etc. 1344 * 1345 * Before the /proc/pid/status file was created the only way to read 1346 * the effective uid of a /process was to stat /proc/pid. Reading 1347 * /proc/pid/status is slow enough that procps and other packages 1348 * kept stating /proc/pid. To keep the rules in /proc simple I have 1349 * made this apply to all per process world readable and executable 1350 * directories. 1351 */ 1352 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1353 { 1354 struct inode *inode = dentry->d_inode; 1355 struct task_struct *task = get_proc_task(inode); 1356 if (task) { 1357 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1358 task_dumpable(task)) { 1359 inode->i_uid = task->euid; 1360 inode->i_gid = task->egid; 1361 } else { 1362 inode->i_uid = 0; 1363 inode->i_gid = 0; 1364 } 1365 inode->i_mode &= ~(S_ISUID | S_ISGID); 1366 security_task_to_inode(task, inode); 1367 put_task_struct(task); 1368 return 1; 1369 } 1370 d_drop(dentry); 1371 return 0; 1372 } 1373 1374 static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1375 { 1376 struct inode *inode = dentry->d_inode; 1377 struct task_struct *task; 1378 generic_fillattr(inode, stat); 1379 1380 rcu_read_lock(); 1381 stat->uid = 0; 1382 stat->gid = 0; 1383 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1384 if (task) { 1385 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1386 task_dumpable(task)) { 1387 stat->uid = task->euid; 1388 stat->gid = task->egid; 1389 } 1390 } 1391 rcu_read_unlock(); 1392 return 0; 1393 } 1394 1395 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1396 { 1397 struct inode *inode = dentry->d_inode; 1398 struct task_struct *task = get_proc_task(inode); 1399 int fd = proc_fd(inode); 1400 struct files_struct *files; 1401 1402 if (task) { 1403 files = get_files_struct(task); 1404 if (files) { 1405 rcu_read_lock(); 1406 if (fcheck_files(files, fd)) { 1407 rcu_read_unlock(); 1408 put_files_struct(files); 1409 if (task_dumpable(task)) { 1410 inode->i_uid = task->euid; 1411 inode->i_gid = task->egid; 1412 } else { 1413 inode->i_uid = 0; 1414 inode->i_gid = 0; 1415 } 1416 inode->i_mode &= ~(S_ISUID | S_ISGID); 1417 security_task_to_inode(task, inode); 1418 put_task_struct(task); 1419 return 1; 1420 } 1421 rcu_read_unlock(); 1422 put_files_struct(files); 1423 } 1424 put_task_struct(task); 1425 } 1426 d_drop(dentry); 1427 return 0; 1428 } 1429 1430 static int pid_delete_dentry(struct dentry * dentry) 1431 { 1432 /* Is the task we represent dead? 1433 * If so, then don't put the dentry on the lru list, 1434 * kill it immediately. 1435 */ 1436 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1437 } 1438 1439 static struct dentry_operations tid_fd_dentry_operations = 1440 { 1441 .d_revalidate = tid_fd_revalidate, 1442 .d_delete = pid_delete_dentry, 1443 }; 1444 1445 static struct dentry_operations pid_dentry_operations = 1446 { 1447 .d_revalidate = pid_revalidate, 1448 .d_delete = pid_delete_dentry, 1449 }; 1450 1451 /* Lookups */ 1452 1453 static unsigned name_to_int(struct dentry *dentry) 1454 { 1455 const char *name = dentry->d_name.name; 1456 int len = dentry->d_name.len; 1457 unsigned n = 0; 1458 1459 if (len > 1 && *name == '0') 1460 goto out; 1461 while (len-- > 0) { 1462 unsigned c = *name++ - '0'; 1463 if (c > 9) 1464 goto out; 1465 if (n >= (~0U-9)/10) 1466 goto out; 1467 n *= 10; 1468 n += c; 1469 } 1470 return n; 1471 out: 1472 return ~0U; 1473 } 1474 1475 /* SMP-safe */ 1476 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1477 { 1478 struct task_struct *task = get_proc_task(dir); 1479 unsigned fd = name_to_int(dentry); 1480 struct dentry *result = ERR_PTR(-ENOENT); 1481 struct file * file; 1482 struct files_struct * files; 1483 struct inode *inode; 1484 struct proc_inode *ei; 1485 1486 if (!task) 1487 goto out_no_task; 1488 if (fd == ~0U) 1489 goto out; 1490 1491 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1492 if (!inode) 1493 goto out; 1494 ei = PROC_I(inode); 1495 ei->fd = fd; 1496 files = get_files_struct(task); 1497 if (!files) 1498 goto out_unlock; 1499 inode->i_mode = S_IFLNK; 1500 1501 /* 1502 * We are not taking a ref to the file structure, so we must 1503 * hold ->file_lock. 1504 */ 1505 spin_lock(&files->file_lock); 1506 file = fcheck_files(files, fd); 1507 if (!file) 1508 goto out_unlock2; 1509 if (file->f_mode & 1) 1510 inode->i_mode |= S_IRUSR | S_IXUSR; 1511 if (file->f_mode & 2) 1512 inode->i_mode |= S_IWUSR | S_IXUSR; 1513 spin_unlock(&files->file_lock); 1514 put_files_struct(files); 1515 inode->i_op = &proc_pid_link_inode_operations; 1516 inode->i_size = 64; 1517 ei->op.proc_get_link = proc_fd_link; 1518 dentry->d_op = &tid_fd_dentry_operations; 1519 d_add(dentry, inode); 1520 /* Close the race of the process dying before we return the dentry */ 1521 if (tid_fd_revalidate(dentry, NULL)) 1522 result = NULL; 1523 out: 1524 put_task_struct(task); 1525 out_no_task: 1526 return result; 1527 1528 out_unlock2: 1529 spin_unlock(&files->file_lock); 1530 put_files_struct(files); 1531 out_unlock: 1532 iput(inode); 1533 goto out; 1534 } 1535 1536 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1537 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1538 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 1539 1540 static struct file_operations proc_fd_operations = { 1541 .read = generic_read_dir, 1542 .readdir = proc_readfd, 1543 }; 1544 1545 static struct file_operations proc_task_operations = { 1546 .read = generic_read_dir, 1547 .readdir = proc_task_readdir, 1548 }; 1549 1550 /* 1551 * proc directories can do almost nothing.. 1552 */ 1553 static struct inode_operations proc_fd_inode_operations = { 1554 .lookup = proc_lookupfd, 1555 .setattr = proc_setattr, 1556 }; 1557 1558 static struct inode_operations proc_task_inode_operations = { 1559 .lookup = proc_task_lookup, 1560 .getattr = proc_task_getattr, 1561 .setattr = proc_setattr, 1562 }; 1563 1564 #ifdef CONFIG_SECURITY 1565 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1566 size_t count, loff_t *ppos) 1567 { 1568 struct inode * inode = file->f_dentry->d_inode; 1569 unsigned long page; 1570 ssize_t length; 1571 struct task_struct *task = get_proc_task(inode); 1572 1573 length = -ESRCH; 1574 if (!task) 1575 goto out_no_task; 1576 1577 if (count > PAGE_SIZE) 1578 count = PAGE_SIZE; 1579 length = -ENOMEM; 1580 if (!(page = __get_free_page(GFP_KERNEL))) 1581 goto out; 1582 1583 length = security_getprocattr(task, 1584 (char*)file->f_dentry->d_name.name, 1585 (void*)page, count); 1586 if (length >= 0) 1587 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1588 free_page(page); 1589 out: 1590 put_task_struct(task); 1591 out_no_task: 1592 return length; 1593 } 1594 1595 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1596 size_t count, loff_t *ppos) 1597 { 1598 struct inode * inode = file->f_dentry->d_inode; 1599 char *page; 1600 ssize_t length; 1601 struct task_struct *task = get_proc_task(inode); 1602 1603 length = -ESRCH; 1604 if (!task) 1605 goto out_no_task; 1606 if (count > PAGE_SIZE) 1607 count = PAGE_SIZE; 1608 1609 /* No partial writes. */ 1610 length = -EINVAL; 1611 if (*ppos != 0) 1612 goto out; 1613 1614 length = -ENOMEM; 1615 page = (char*)__get_free_page(GFP_USER); 1616 if (!page) 1617 goto out; 1618 1619 length = -EFAULT; 1620 if (copy_from_user(page, buf, count)) 1621 goto out_free; 1622 1623 length = security_setprocattr(task, 1624 (char*)file->f_dentry->d_name.name, 1625 (void*)page, count); 1626 out_free: 1627 free_page((unsigned long) page); 1628 out: 1629 put_task_struct(task); 1630 out_no_task: 1631 return length; 1632 } 1633 1634 static struct file_operations proc_pid_attr_operations = { 1635 .read = proc_pid_attr_read, 1636 .write = proc_pid_attr_write, 1637 }; 1638 1639 static struct file_operations proc_tid_attr_operations; 1640 static struct inode_operations proc_tid_attr_inode_operations; 1641 static struct file_operations proc_tgid_attr_operations; 1642 static struct inode_operations proc_tgid_attr_inode_operations; 1643 #endif 1644 1645 /* SMP-safe */ 1646 static struct dentry *proc_pident_lookup(struct inode *dir, 1647 struct dentry *dentry, 1648 struct pid_entry *ents) 1649 { 1650 struct inode *inode; 1651 struct dentry *error; 1652 struct task_struct *task = get_proc_task(dir); 1653 struct pid_entry *p; 1654 struct proc_inode *ei; 1655 1656 error = ERR_PTR(-ENOENT); 1657 inode = NULL; 1658 1659 if (!task) 1660 goto out_no_task; 1661 1662 for (p = ents; p->name; p++) { 1663 if (p->len != dentry->d_name.len) 1664 continue; 1665 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1666 break; 1667 } 1668 if (!p->name) 1669 goto out; 1670 1671 error = ERR_PTR(-EINVAL); 1672 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1673 if (!inode) 1674 goto out; 1675 1676 ei = PROC_I(inode); 1677 inode->i_mode = p->mode; 1678 /* 1679 * Yes, it does not scale. And it should not. Don't add 1680 * new entries into /proc/<tgid>/ without very good reasons. 1681 */ 1682 switch(p->type) { 1683 case PROC_TGID_TASK: 1684 inode->i_nlink = 2; 1685 inode->i_op = &proc_task_inode_operations; 1686 inode->i_fop = &proc_task_operations; 1687 break; 1688 case PROC_TID_FD: 1689 case PROC_TGID_FD: 1690 inode->i_nlink = 2; 1691 inode->i_op = &proc_fd_inode_operations; 1692 inode->i_fop = &proc_fd_operations; 1693 break; 1694 case PROC_TID_EXE: 1695 case PROC_TGID_EXE: 1696 inode->i_op = &proc_pid_link_inode_operations; 1697 ei->op.proc_get_link = proc_exe_link; 1698 break; 1699 case PROC_TID_CWD: 1700 case PROC_TGID_CWD: 1701 inode->i_op = &proc_pid_link_inode_operations; 1702 ei->op.proc_get_link = proc_cwd_link; 1703 break; 1704 case PROC_TID_ROOT: 1705 case PROC_TGID_ROOT: 1706 inode->i_op = &proc_pid_link_inode_operations; 1707 ei->op.proc_get_link = proc_root_link; 1708 break; 1709 case PROC_TID_ENVIRON: 1710 case PROC_TGID_ENVIRON: 1711 inode->i_fop = &proc_info_file_operations; 1712 ei->op.proc_read = proc_pid_environ; 1713 break; 1714 case PROC_TID_AUXV: 1715 case PROC_TGID_AUXV: 1716 inode->i_fop = &proc_info_file_operations; 1717 ei->op.proc_read = proc_pid_auxv; 1718 break; 1719 case PROC_TID_STATUS: 1720 case PROC_TGID_STATUS: 1721 inode->i_fop = &proc_info_file_operations; 1722 ei->op.proc_read = proc_pid_status; 1723 break; 1724 case PROC_TID_STAT: 1725 inode->i_fop = &proc_info_file_operations; 1726 ei->op.proc_read = proc_tid_stat; 1727 break; 1728 case PROC_TGID_STAT: 1729 inode->i_fop = &proc_info_file_operations; 1730 ei->op.proc_read = proc_tgid_stat; 1731 break; 1732 case PROC_TID_CMDLINE: 1733 case PROC_TGID_CMDLINE: 1734 inode->i_fop = &proc_info_file_operations; 1735 ei->op.proc_read = proc_pid_cmdline; 1736 break; 1737 case PROC_TID_STATM: 1738 case PROC_TGID_STATM: 1739 inode->i_fop = &proc_info_file_operations; 1740 ei->op.proc_read = proc_pid_statm; 1741 break; 1742 case PROC_TID_MAPS: 1743 case PROC_TGID_MAPS: 1744 inode->i_fop = &proc_maps_operations; 1745 break; 1746 #ifdef CONFIG_NUMA 1747 case PROC_TID_NUMA_MAPS: 1748 case PROC_TGID_NUMA_MAPS: 1749 inode->i_fop = &proc_numa_maps_operations; 1750 break; 1751 #endif 1752 case PROC_TID_MEM: 1753 case PROC_TGID_MEM: 1754 inode->i_fop = &proc_mem_operations; 1755 break; 1756 #ifdef CONFIG_SECCOMP 1757 case PROC_TID_SECCOMP: 1758 case PROC_TGID_SECCOMP: 1759 inode->i_fop = &proc_seccomp_operations; 1760 break; 1761 #endif /* CONFIG_SECCOMP */ 1762 case PROC_TID_MOUNTS: 1763 case PROC_TGID_MOUNTS: 1764 inode->i_fop = &proc_mounts_operations; 1765 break; 1766 #ifdef CONFIG_MMU 1767 case PROC_TID_SMAPS: 1768 case PROC_TGID_SMAPS: 1769 inode->i_fop = &proc_smaps_operations; 1770 break; 1771 #endif 1772 case PROC_TID_MOUNTSTATS: 1773 case PROC_TGID_MOUNTSTATS: 1774 inode->i_fop = &proc_mountstats_operations; 1775 break; 1776 #ifdef CONFIG_SECURITY 1777 case PROC_TID_ATTR: 1778 inode->i_nlink = 2; 1779 inode->i_op = &proc_tid_attr_inode_operations; 1780 inode->i_fop = &proc_tid_attr_operations; 1781 break; 1782 case PROC_TGID_ATTR: 1783 inode->i_nlink = 2; 1784 inode->i_op = &proc_tgid_attr_inode_operations; 1785 inode->i_fop = &proc_tgid_attr_operations; 1786 break; 1787 case PROC_TID_ATTR_CURRENT: 1788 case PROC_TGID_ATTR_CURRENT: 1789 case PROC_TID_ATTR_PREV: 1790 case PROC_TGID_ATTR_PREV: 1791 case PROC_TID_ATTR_EXEC: 1792 case PROC_TGID_ATTR_EXEC: 1793 case PROC_TID_ATTR_FSCREATE: 1794 case PROC_TGID_ATTR_FSCREATE: 1795 case PROC_TID_ATTR_KEYCREATE: 1796 case PROC_TGID_ATTR_KEYCREATE: 1797 case PROC_TID_ATTR_SOCKCREATE: 1798 case PROC_TGID_ATTR_SOCKCREATE: 1799 inode->i_fop = &proc_pid_attr_operations; 1800 break; 1801 #endif 1802 #ifdef CONFIG_KALLSYMS 1803 case PROC_TID_WCHAN: 1804 case PROC_TGID_WCHAN: 1805 inode->i_fop = &proc_info_file_operations; 1806 ei->op.proc_read = proc_pid_wchan; 1807 break; 1808 #endif 1809 #ifdef CONFIG_SCHEDSTATS 1810 case PROC_TID_SCHEDSTAT: 1811 case PROC_TGID_SCHEDSTAT: 1812 inode->i_fop = &proc_info_file_operations; 1813 ei->op.proc_read = proc_pid_schedstat; 1814 break; 1815 #endif 1816 #ifdef CONFIG_CPUSETS 1817 case PROC_TID_CPUSET: 1818 case PROC_TGID_CPUSET: 1819 inode->i_fop = &proc_cpuset_operations; 1820 break; 1821 #endif 1822 case PROC_TID_OOM_SCORE: 1823 case PROC_TGID_OOM_SCORE: 1824 inode->i_fop = &proc_info_file_operations; 1825 ei->op.proc_read = proc_oom_score; 1826 break; 1827 case PROC_TID_OOM_ADJUST: 1828 case PROC_TGID_OOM_ADJUST: 1829 inode->i_fop = &proc_oom_adjust_operations; 1830 break; 1831 #ifdef CONFIG_AUDITSYSCALL 1832 case PROC_TID_LOGINUID: 1833 case PROC_TGID_LOGINUID: 1834 inode->i_fop = &proc_loginuid_operations; 1835 break; 1836 #endif 1837 default: 1838 printk("procfs: impossible type (%d)",p->type); 1839 iput(inode); 1840 error = ERR_PTR(-EINVAL); 1841 goto out; 1842 } 1843 dentry->d_op = &pid_dentry_operations; 1844 d_add(dentry, inode); 1845 /* Close the race of the process dying before we return the dentry */ 1846 if (pid_revalidate(dentry, NULL)) 1847 error = NULL; 1848 out: 1849 put_task_struct(task); 1850 out_no_task: 1851 return error; 1852 } 1853 1854 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1855 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1856 } 1857 1858 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1859 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1860 } 1861 1862 static struct file_operations proc_tgid_base_operations = { 1863 .read = generic_read_dir, 1864 .readdir = proc_tgid_base_readdir, 1865 }; 1866 1867 static struct file_operations proc_tid_base_operations = { 1868 .read = generic_read_dir, 1869 .readdir = proc_tid_base_readdir, 1870 }; 1871 1872 static struct inode_operations proc_tgid_base_inode_operations = { 1873 .lookup = proc_tgid_base_lookup, 1874 .getattr = pid_getattr, 1875 .setattr = proc_setattr, 1876 }; 1877 1878 static struct inode_operations proc_tid_base_inode_operations = { 1879 .lookup = proc_tid_base_lookup, 1880 .getattr = pid_getattr, 1881 .setattr = proc_setattr, 1882 }; 1883 1884 #ifdef CONFIG_SECURITY 1885 static int proc_tgid_attr_readdir(struct file * filp, 1886 void * dirent, filldir_t filldir) 1887 { 1888 return proc_pident_readdir(filp,dirent,filldir, 1889 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1890 } 1891 1892 static int proc_tid_attr_readdir(struct file * filp, 1893 void * dirent, filldir_t filldir) 1894 { 1895 return proc_pident_readdir(filp,dirent,filldir, 1896 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1897 } 1898 1899 static struct file_operations proc_tgid_attr_operations = { 1900 .read = generic_read_dir, 1901 .readdir = proc_tgid_attr_readdir, 1902 }; 1903 1904 static struct file_operations proc_tid_attr_operations = { 1905 .read = generic_read_dir, 1906 .readdir = proc_tid_attr_readdir, 1907 }; 1908 1909 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1910 struct dentry *dentry, struct nameidata *nd) 1911 { 1912 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1913 } 1914 1915 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1916 struct dentry *dentry, struct nameidata *nd) 1917 { 1918 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1919 } 1920 1921 static struct inode_operations proc_tgid_attr_inode_operations = { 1922 .lookup = proc_tgid_attr_lookup, 1923 .getattr = pid_getattr, 1924 .setattr = proc_setattr, 1925 }; 1926 1927 static struct inode_operations proc_tid_attr_inode_operations = { 1928 .lookup = proc_tid_attr_lookup, 1929 .getattr = pid_getattr, 1930 .setattr = proc_setattr, 1931 }; 1932 #endif 1933 1934 /* 1935 * /proc/self: 1936 */ 1937 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1938 int buflen) 1939 { 1940 char tmp[PROC_NUMBUF]; 1941 sprintf(tmp, "%d", current->tgid); 1942 return vfs_readlink(dentry,buffer,buflen,tmp); 1943 } 1944 1945 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1946 { 1947 char tmp[PROC_NUMBUF]; 1948 sprintf(tmp, "%d", current->tgid); 1949 return ERR_PTR(vfs_follow_link(nd,tmp)); 1950 } 1951 1952 static struct inode_operations proc_self_inode_operations = { 1953 .readlink = proc_self_readlink, 1954 .follow_link = proc_self_follow_link, 1955 }; 1956 1957 /** 1958 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 1959 * 1960 * @task: task that should be flushed. 1961 * 1962 * Looks in the dcache for 1963 * /proc/@pid 1964 * /proc/@tgid/task/@pid 1965 * if either directory is present flushes it and all of it'ts children 1966 * from the dcache. 1967 * 1968 * It is safe and reasonable to cache /proc entries for a task until 1969 * that task exits. After that they just clog up the dcache with 1970 * useless entries, possibly causing useful dcache entries to be 1971 * flushed instead. This routine is proved to flush those useless 1972 * dcache entries at process exit time. 1973 * 1974 * NOTE: This routine is just an optimization so it does not guarantee 1975 * that no dcache entries will exist at process exit time it 1976 * just makes it very unlikely that any will persist. 1977 */ 1978 void proc_flush_task(struct task_struct *task) 1979 { 1980 struct dentry *dentry, *leader, *dir; 1981 char buf[PROC_NUMBUF]; 1982 struct qstr name; 1983 1984 name.name = buf; 1985 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 1986 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); 1987 if (dentry) { 1988 shrink_dcache_parent(dentry); 1989 d_drop(dentry); 1990 dput(dentry); 1991 } 1992 1993 if (thread_group_leader(task)) 1994 goto out; 1995 1996 name.name = buf; 1997 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); 1998 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); 1999 if (!leader) 2000 goto out; 2001 2002 name.name = "task"; 2003 name.len = strlen(name.name); 2004 dir = d_hash_and_lookup(leader, &name); 2005 if (!dir) 2006 goto out_put_leader; 2007 2008 name.name = buf; 2009 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 2010 dentry = d_hash_and_lookup(dir, &name); 2011 if (dentry) { 2012 shrink_dcache_parent(dentry); 2013 d_drop(dentry); 2014 dput(dentry); 2015 } 2016 2017 dput(dir); 2018 out_put_leader: 2019 dput(leader); 2020 out: 2021 return; 2022 } 2023 2024 /* SMP-safe */ 2025 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2026 { 2027 struct dentry *result = ERR_PTR(-ENOENT); 2028 struct task_struct *task; 2029 struct inode *inode; 2030 struct proc_inode *ei; 2031 unsigned tgid; 2032 2033 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2034 inode = new_inode(dir->i_sb); 2035 if (!inode) 2036 return ERR_PTR(-ENOMEM); 2037 ei = PROC_I(inode); 2038 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2039 inode->i_ino = fake_ino(0, PROC_TGID_INO); 2040 ei->pde = NULL; 2041 inode->i_mode = S_IFLNK|S_IRWXUGO; 2042 inode->i_uid = inode->i_gid = 0; 2043 inode->i_size = 64; 2044 inode->i_op = &proc_self_inode_operations; 2045 d_add(dentry, inode); 2046 return NULL; 2047 } 2048 tgid = name_to_int(dentry); 2049 if (tgid == ~0U) 2050 goto out; 2051 2052 rcu_read_lock(); 2053 task = find_task_by_pid(tgid); 2054 if (task) 2055 get_task_struct(task); 2056 rcu_read_unlock(); 2057 if (!task) 2058 goto out; 2059 2060 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2061 if (!inode) 2062 goto out_put_task; 2063 2064 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2065 inode->i_op = &proc_tgid_base_inode_operations; 2066 inode->i_fop = &proc_tgid_base_operations; 2067 inode->i_flags|=S_IMMUTABLE; 2068 #ifdef CONFIG_SECURITY 2069 inode->i_nlink = 5; 2070 #else 2071 inode->i_nlink = 4; 2072 #endif 2073 2074 dentry->d_op = &pid_dentry_operations; 2075 2076 d_add(dentry, inode); 2077 /* Close the race of the process dying before we return the dentry */ 2078 if (pid_revalidate(dentry, NULL)) 2079 result = NULL; 2080 2081 out_put_task: 2082 put_task_struct(task); 2083 out: 2084 return result; 2085 } 2086 2087 /* SMP-safe */ 2088 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2089 { 2090 struct dentry *result = ERR_PTR(-ENOENT); 2091 struct task_struct *task; 2092 struct task_struct *leader = get_proc_task(dir); 2093 struct inode *inode; 2094 unsigned tid; 2095 2096 if (!leader) 2097 goto out_no_task; 2098 2099 tid = name_to_int(dentry); 2100 if (tid == ~0U) 2101 goto out; 2102 2103 rcu_read_lock(); 2104 task = find_task_by_pid(tid); 2105 if (task) 2106 get_task_struct(task); 2107 rcu_read_unlock(); 2108 if (!task) 2109 goto out; 2110 if (leader->tgid != task->tgid) 2111 goto out_drop_task; 2112 2113 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 2114 2115 2116 if (!inode) 2117 goto out_drop_task; 2118 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2119 inode->i_op = &proc_tid_base_inode_operations; 2120 inode->i_fop = &proc_tid_base_operations; 2121 inode->i_flags|=S_IMMUTABLE; 2122 #ifdef CONFIG_SECURITY 2123 inode->i_nlink = 4; 2124 #else 2125 inode->i_nlink = 3; 2126 #endif 2127 2128 dentry->d_op = &pid_dentry_operations; 2129 2130 d_add(dentry, inode); 2131 /* Close the race of the process dying before we return the dentry */ 2132 if (pid_revalidate(dentry, NULL)) 2133 result = NULL; 2134 2135 out_drop_task: 2136 put_task_struct(task); 2137 out: 2138 put_task_struct(leader); 2139 out_no_task: 2140 return result; 2141 } 2142 2143 /* 2144 * Find the first tgid to return to user space. 2145 * 2146 * Usually this is just whatever follows &init_task, but if the users 2147 * buffer was too small to hold the full list or there was a seek into 2148 * the middle of the directory we have more work to do. 2149 * 2150 * In the case of a short read we start with find_task_by_pid. 2151 * 2152 * In the case of a seek we start with &init_task and walk nr 2153 * threads past it. 2154 */ 2155 static struct task_struct *first_tgid(int tgid, unsigned int nr) 2156 { 2157 struct task_struct *pos; 2158 rcu_read_lock(); 2159 if (tgid && nr) { 2160 pos = find_task_by_pid(tgid); 2161 if (pos && thread_group_leader(pos)) 2162 goto found; 2163 } 2164 /* If nr exceeds the number of processes get out quickly */ 2165 pos = NULL; 2166 if (nr && nr >= nr_processes()) 2167 goto done; 2168 2169 /* If we haven't found our starting place yet start with 2170 * the init_task and walk nr tasks forward. 2171 */ 2172 for (pos = next_task(&init_task); nr > 0; --nr) { 2173 pos = next_task(pos); 2174 if (pos == &init_task) { 2175 pos = NULL; 2176 goto done; 2177 } 2178 } 2179 found: 2180 get_task_struct(pos); 2181 done: 2182 rcu_read_unlock(); 2183 return pos; 2184 } 2185 2186 /* 2187 * Find the next task in the task list. 2188 * Return NULL if we loop or there is any error. 2189 * 2190 * The reference to the input task_struct is released. 2191 */ 2192 static struct task_struct *next_tgid(struct task_struct *start) 2193 { 2194 struct task_struct *pos; 2195 rcu_read_lock(); 2196 pos = start; 2197 if (pid_alive(start)) 2198 pos = next_task(start); 2199 if (pid_alive(pos) && (pos != &init_task)) { 2200 get_task_struct(pos); 2201 goto done; 2202 } 2203 pos = NULL; 2204 done: 2205 rcu_read_unlock(); 2206 put_task_struct(start); 2207 return pos; 2208 } 2209 2210 /* for the /proc/ directory itself, after non-process stuff has been done */ 2211 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2212 { 2213 char buf[PROC_NUMBUF]; 2214 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2215 struct task_struct *task; 2216 int tgid; 2217 2218 if (!nr) { 2219 ino_t ino = fake_ino(0,PROC_TGID_INO); 2220 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 2221 return 0; 2222 filp->f_pos++; 2223 nr++; 2224 } 2225 nr -= 1; 2226 2227 /* f_version caches the tgid value that the last readdir call couldn't 2228 * return. lseek aka telldir automagically resets f_version to 0. 2229 */ 2230 tgid = filp->f_version; 2231 filp->f_version = 0; 2232 for (task = first_tgid(tgid, nr); 2233 task; 2234 task = next_tgid(task), filp->f_pos++) { 2235 int len; 2236 ino_t ino; 2237 tgid = task->pid; 2238 len = snprintf(buf, sizeof(buf), "%d", tgid); 2239 ino = fake_ino(tgid, PROC_TGID_INO); 2240 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { 2241 /* returning this tgid failed, save it as the first 2242 * pid for the next readir call */ 2243 filp->f_version = tgid; 2244 put_task_struct(task); 2245 break; 2246 } 2247 } 2248 return 0; 2249 } 2250 2251 /* 2252 * Find the first tid of a thread group to return to user space. 2253 * 2254 * Usually this is just the thread group leader, but if the users 2255 * buffer was too small or there was a seek into the middle of the 2256 * directory we have more work todo. 2257 * 2258 * In the case of a short read we start with find_task_by_pid. 2259 * 2260 * In the case of a seek we start with the leader and walk nr 2261 * threads past it. 2262 */ 2263 static struct task_struct *first_tid(struct task_struct *leader, 2264 int tid, int nr) 2265 { 2266 struct task_struct *pos; 2267 2268 rcu_read_lock(); 2269 /* Attempt to start with the pid of a thread */ 2270 if (tid && (nr > 0)) { 2271 pos = find_task_by_pid(tid); 2272 if (pos && (pos->group_leader == leader)) 2273 goto found; 2274 } 2275 2276 /* If nr exceeds the number of threads there is nothing todo */ 2277 pos = NULL; 2278 if (nr && nr >= get_nr_threads(leader)) 2279 goto out; 2280 2281 /* If we haven't found our starting place yet start 2282 * with the leader and walk nr threads forward. 2283 */ 2284 for (pos = leader; nr > 0; --nr) { 2285 pos = next_thread(pos); 2286 if (pos == leader) { 2287 pos = NULL; 2288 goto out; 2289 } 2290 } 2291 found: 2292 get_task_struct(pos); 2293 out: 2294 rcu_read_unlock(); 2295 return pos; 2296 } 2297 2298 /* 2299 * Find the next thread in the thread list. 2300 * Return NULL if there is an error or no next thread. 2301 * 2302 * The reference to the input task_struct is released. 2303 */ 2304 static struct task_struct *next_tid(struct task_struct *start) 2305 { 2306 struct task_struct *pos = NULL; 2307 rcu_read_lock(); 2308 if (pid_alive(start)) { 2309 pos = next_thread(start); 2310 if (thread_group_leader(pos)) 2311 pos = NULL; 2312 else 2313 get_task_struct(pos); 2314 } 2315 rcu_read_unlock(); 2316 put_task_struct(start); 2317 return pos; 2318 } 2319 2320 /* for the /proc/TGID/task/ directories */ 2321 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2322 { 2323 char buf[PROC_NUMBUF]; 2324 struct dentry *dentry = filp->f_dentry; 2325 struct inode *inode = dentry->d_inode; 2326 struct task_struct *leader = get_proc_task(inode); 2327 struct task_struct *task; 2328 int retval = -ENOENT; 2329 ino_t ino; 2330 int tid; 2331 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2332 2333 if (!leader) 2334 goto out_no_task; 2335 retval = 0; 2336 2337 switch (pos) { 2338 case 0: 2339 ino = inode->i_ino; 2340 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2341 goto out; 2342 pos++; 2343 /* fall through */ 2344 case 1: 2345 ino = parent_ino(dentry); 2346 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2347 goto out; 2348 pos++; 2349 /* fall through */ 2350 } 2351 2352 /* f_version caches the tgid value that the last readdir call couldn't 2353 * return. lseek aka telldir automagically resets f_version to 0. 2354 */ 2355 tid = filp->f_version; 2356 filp->f_version = 0; 2357 for (task = first_tid(leader, tid, pos - 2); 2358 task; 2359 task = next_tid(task), pos++) { 2360 int len; 2361 tid = task->pid; 2362 len = snprintf(buf, sizeof(buf), "%d", tid); 2363 ino = fake_ino(tid, PROC_TID_INO); 2364 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { 2365 /* returning this tgid failed, save it as the first 2366 * pid for the next readir call */ 2367 filp->f_version = tid; 2368 put_task_struct(task); 2369 break; 2370 } 2371 } 2372 out: 2373 filp->f_pos = pos; 2374 put_task_struct(leader); 2375 out_no_task: 2376 return retval; 2377 } 2378 2379 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 2380 { 2381 struct inode *inode = dentry->d_inode; 2382 struct task_struct *p = get_proc_task(inode); 2383 generic_fillattr(inode, stat); 2384 2385 if (p) { 2386 rcu_read_lock(); 2387 stat->nlink += get_nr_threads(p); 2388 rcu_read_unlock(); 2389 put_task_struct(p); 2390 } 2391 2392 return 0; 2393 } 2394