1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/config.h> 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/init.h> 58 #include <linux/capability.h> 59 #include <linux/file.h> 60 #include <linux/string.h> 61 #include <linux/seq_file.h> 62 #include <linux/namei.h> 63 #include <linux/namespace.h> 64 #include <linux/mm.h> 65 #include <linux/smp_lock.h> 66 #include <linux/rcupdate.h> 67 #include <linux/kallsyms.h> 68 #include <linux/mount.h> 69 #include <linux/security.h> 70 #include <linux/ptrace.h> 71 #include <linux/seccomp.h> 72 #include <linux/cpuset.h> 73 #include <linux/audit.h> 74 #include <linux/poll.h> 75 #include "internal.h" 76 77 /* NOTE: 78 * Implementing inode permission operations in /proc is almost 79 * certainly an error. Permission checks need to happen during 80 * each system call not at open time. The reason is that most of 81 * what we wish to check for permissions in /proc varies at runtime. 82 * 83 * The classic example of a problem is opening file descriptors 84 * in /proc for a task before it execs a suid executable. 85 */ 86 87 /* 88 * For hysterical raisins we keep the same inumbers as in the old procfs. 89 * Feel free to change the macro below - just keep the range distinct from 90 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 91 * As soon as we'll get a separate superblock we will be able to forget 92 * about magical ranges too. 93 */ 94 95 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 96 97 enum pid_directory_inos { 98 PROC_TGID_INO = 2, 99 PROC_TGID_TASK, 100 PROC_TGID_STATUS, 101 PROC_TGID_MEM, 102 #ifdef CONFIG_SECCOMP 103 PROC_TGID_SECCOMP, 104 #endif 105 PROC_TGID_CWD, 106 PROC_TGID_ROOT, 107 PROC_TGID_EXE, 108 PROC_TGID_FD, 109 PROC_TGID_ENVIRON, 110 PROC_TGID_AUXV, 111 PROC_TGID_CMDLINE, 112 PROC_TGID_STAT, 113 PROC_TGID_STATM, 114 PROC_TGID_MAPS, 115 PROC_TGID_NUMA_MAPS, 116 PROC_TGID_MOUNTS, 117 PROC_TGID_MOUNTSTATS, 118 PROC_TGID_WCHAN, 119 #ifdef CONFIG_MMU 120 PROC_TGID_SMAPS, 121 #endif 122 #ifdef CONFIG_SCHEDSTATS 123 PROC_TGID_SCHEDSTAT, 124 #endif 125 #ifdef CONFIG_CPUSETS 126 PROC_TGID_CPUSET, 127 #endif 128 #ifdef CONFIG_SECURITY 129 PROC_TGID_ATTR, 130 PROC_TGID_ATTR_CURRENT, 131 PROC_TGID_ATTR_PREV, 132 PROC_TGID_ATTR_EXEC, 133 PROC_TGID_ATTR_FSCREATE, 134 PROC_TGID_ATTR_KEYCREATE, 135 PROC_TGID_ATTR_SOCKCREATE, 136 #endif 137 #ifdef CONFIG_AUDITSYSCALL 138 PROC_TGID_LOGINUID, 139 #endif 140 PROC_TGID_OOM_SCORE, 141 PROC_TGID_OOM_ADJUST, 142 PROC_TID_INO, 143 PROC_TID_STATUS, 144 PROC_TID_MEM, 145 #ifdef CONFIG_SECCOMP 146 PROC_TID_SECCOMP, 147 #endif 148 PROC_TID_CWD, 149 PROC_TID_ROOT, 150 PROC_TID_EXE, 151 PROC_TID_FD, 152 PROC_TID_ENVIRON, 153 PROC_TID_AUXV, 154 PROC_TID_CMDLINE, 155 PROC_TID_STAT, 156 PROC_TID_STATM, 157 PROC_TID_MAPS, 158 PROC_TID_NUMA_MAPS, 159 PROC_TID_MOUNTS, 160 PROC_TID_MOUNTSTATS, 161 PROC_TID_WCHAN, 162 #ifdef CONFIG_MMU 163 PROC_TID_SMAPS, 164 #endif 165 #ifdef CONFIG_SCHEDSTATS 166 PROC_TID_SCHEDSTAT, 167 #endif 168 #ifdef CONFIG_CPUSETS 169 PROC_TID_CPUSET, 170 #endif 171 #ifdef CONFIG_SECURITY 172 PROC_TID_ATTR, 173 PROC_TID_ATTR_CURRENT, 174 PROC_TID_ATTR_PREV, 175 PROC_TID_ATTR_EXEC, 176 PROC_TID_ATTR_FSCREATE, 177 PROC_TID_ATTR_KEYCREATE, 178 PROC_TID_ATTR_SOCKCREATE, 179 #endif 180 #ifdef CONFIG_AUDITSYSCALL 181 PROC_TID_LOGINUID, 182 #endif 183 PROC_TID_OOM_SCORE, 184 PROC_TID_OOM_ADJUST, 185 186 /* Add new entries before this */ 187 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 188 }; 189 190 /* Worst case buffer size needed for holding an integer. */ 191 #define PROC_NUMBUF 10 192 193 struct pid_entry { 194 int type; 195 int len; 196 char *name; 197 mode_t mode; 198 }; 199 200 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 201 202 static struct pid_entry tgid_base_stuff[] = { 203 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 204 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 205 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 206 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 207 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 208 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 209 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 210 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 211 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 212 #ifdef CONFIG_NUMA 213 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 214 #endif 215 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 216 #ifdef CONFIG_SECCOMP 217 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 218 #endif 219 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 220 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 221 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 222 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 223 E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR), 224 #ifdef CONFIG_MMU 225 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 226 #endif 227 #ifdef CONFIG_SECURITY 228 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 229 #endif 230 #ifdef CONFIG_KALLSYMS 231 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 232 #endif 233 #ifdef CONFIG_SCHEDSTATS 234 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 235 #endif 236 #ifdef CONFIG_CPUSETS 237 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 238 #endif 239 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 240 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 241 #ifdef CONFIG_AUDITSYSCALL 242 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 243 #endif 244 {0,0,NULL,0} 245 }; 246 static struct pid_entry tid_base_stuff[] = { 247 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 248 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 249 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 250 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 251 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 252 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 253 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 254 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 255 #ifdef CONFIG_NUMA 256 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 257 #endif 258 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 259 #ifdef CONFIG_SECCOMP 260 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 261 #endif 262 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 263 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 264 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 265 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 266 #ifdef CONFIG_MMU 267 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), 268 #endif 269 #ifdef CONFIG_SECURITY 270 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 271 #endif 272 #ifdef CONFIG_KALLSYMS 273 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 274 #endif 275 #ifdef CONFIG_SCHEDSTATS 276 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 277 #endif 278 #ifdef CONFIG_CPUSETS 279 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 280 #endif 281 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 282 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 283 #ifdef CONFIG_AUDITSYSCALL 284 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 285 #endif 286 {0,0,NULL,0} 287 }; 288 289 #ifdef CONFIG_SECURITY 290 static struct pid_entry tgid_attr_stuff[] = { 291 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 292 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 293 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 294 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 295 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), 296 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), 297 {0,0,NULL,0} 298 }; 299 static struct pid_entry tid_attr_stuff[] = { 300 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 301 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 302 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 303 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 304 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), 305 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), 306 {0,0,NULL,0} 307 }; 308 #endif 309 310 #undef E 311 312 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 313 { 314 struct task_struct *task = get_proc_task(inode); 315 struct files_struct *files = NULL; 316 struct file *file; 317 int fd = proc_fd(inode); 318 319 if (task) { 320 files = get_files_struct(task); 321 put_task_struct(task); 322 } 323 if (files) { 324 /* 325 * We are not taking a ref to the file structure, so we must 326 * hold ->file_lock. 327 */ 328 spin_lock(&files->file_lock); 329 file = fcheck_files(files, fd); 330 if (file) { 331 *mnt = mntget(file->f_vfsmnt); 332 *dentry = dget(file->f_dentry); 333 spin_unlock(&files->file_lock); 334 put_files_struct(files); 335 return 0; 336 } 337 spin_unlock(&files->file_lock); 338 put_files_struct(files); 339 } 340 return -ENOENT; 341 } 342 343 static struct fs_struct *get_fs_struct(struct task_struct *task) 344 { 345 struct fs_struct *fs; 346 task_lock(task); 347 fs = task->fs; 348 if(fs) 349 atomic_inc(&fs->count); 350 task_unlock(task); 351 return fs; 352 } 353 354 static int get_nr_threads(struct task_struct *tsk) 355 { 356 /* Must be called with the rcu_read_lock held */ 357 unsigned long flags; 358 int count = 0; 359 360 if (lock_task_sighand(tsk, &flags)) { 361 count = atomic_read(&tsk->signal->count); 362 unlock_task_sighand(tsk, &flags); 363 } 364 return count; 365 } 366 367 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 368 { 369 struct task_struct *task = get_proc_task(inode); 370 struct fs_struct *fs = NULL; 371 int result = -ENOENT; 372 373 if (task) { 374 fs = get_fs_struct(task); 375 put_task_struct(task); 376 } 377 if (fs) { 378 read_lock(&fs->lock); 379 *mnt = mntget(fs->pwdmnt); 380 *dentry = dget(fs->pwd); 381 read_unlock(&fs->lock); 382 result = 0; 383 put_fs_struct(fs); 384 } 385 return result; 386 } 387 388 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 389 { 390 struct task_struct *task = get_proc_task(inode); 391 struct fs_struct *fs = NULL; 392 int result = -ENOENT; 393 394 if (task) { 395 fs = get_fs_struct(task); 396 put_task_struct(task); 397 } 398 if (fs) { 399 read_lock(&fs->lock); 400 *mnt = mntget(fs->rootmnt); 401 *dentry = dget(fs->root); 402 read_unlock(&fs->lock); 403 result = 0; 404 put_fs_struct(fs); 405 } 406 return result; 407 } 408 409 #define MAY_PTRACE(task) \ 410 (task == current || \ 411 (task->parent == current && \ 412 (task->ptrace & PT_PTRACED) && \ 413 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 414 security_ptrace(current,task) == 0)) 415 416 static int proc_pid_environ(struct task_struct *task, char * buffer) 417 { 418 int res = 0; 419 struct mm_struct *mm = get_task_mm(task); 420 if (mm) { 421 unsigned int len = mm->env_end - mm->env_start; 422 if (len > PAGE_SIZE) 423 len = PAGE_SIZE; 424 res = access_process_vm(task, mm->env_start, buffer, len, 0); 425 if (!ptrace_may_attach(task)) 426 res = -ESRCH; 427 mmput(mm); 428 } 429 return res; 430 } 431 432 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 433 { 434 int res = 0; 435 unsigned int len; 436 struct mm_struct *mm = get_task_mm(task); 437 if (!mm) 438 goto out; 439 if (!mm->arg_end) 440 goto out_mm; /* Shh! No looking before we're done */ 441 442 len = mm->arg_end - mm->arg_start; 443 444 if (len > PAGE_SIZE) 445 len = PAGE_SIZE; 446 447 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 448 449 // If the nul at the end of args has been overwritten, then 450 // assume application is using setproctitle(3). 451 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 452 len = strnlen(buffer, res); 453 if (len < res) { 454 res = len; 455 } else { 456 len = mm->env_end - mm->env_start; 457 if (len > PAGE_SIZE - res) 458 len = PAGE_SIZE - res; 459 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 460 res = strnlen(buffer, res); 461 } 462 } 463 out_mm: 464 mmput(mm); 465 out: 466 return res; 467 } 468 469 static int proc_pid_auxv(struct task_struct *task, char *buffer) 470 { 471 int res = 0; 472 struct mm_struct *mm = get_task_mm(task); 473 if (mm) { 474 unsigned int nwords = 0; 475 do 476 nwords += 2; 477 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 478 res = nwords * sizeof(mm->saved_auxv[0]); 479 if (res > PAGE_SIZE) 480 res = PAGE_SIZE; 481 memcpy(buffer, mm->saved_auxv, res); 482 mmput(mm); 483 } 484 return res; 485 } 486 487 488 #ifdef CONFIG_KALLSYMS 489 /* 490 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 491 * Returns the resolved symbol. If that fails, simply return the address. 492 */ 493 static int proc_pid_wchan(struct task_struct *task, char *buffer) 494 { 495 char *modname; 496 const char *sym_name; 497 unsigned long wchan, size, offset; 498 char namebuf[KSYM_NAME_LEN+1]; 499 500 wchan = get_wchan(task); 501 502 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 503 if (sym_name) 504 return sprintf(buffer, "%s", sym_name); 505 return sprintf(buffer, "%lu", wchan); 506 } 507 #endif /* CONFIG_KALLSYMS */ 508 509 #ifdef CONFIG_SCHEDSTATS 510 /* 511 * Provides /proc/PID/schedstat 512 */ 513 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 514 { 515 return sprintf(buffer, "%lu %lu %lu\n", 516 task->sched_info.cpu_time, 517 task->sched_info.run_delay, 518 task->sched_info.pcnt); 519 } 520 #endif 521 522 /* The badness from the OOM killer */ 523 unsigned long badness(struct task_struct *p, unsigned long uptime); 524 static int proc_oom_score(struct task_struct *task, char *buffer) 525 { 526 unsigned long points; 527 struct timespec uptime; 528 529 do_posix_clock_monotonic_gettime(&uptime); 530 points = badness(task, uptime.tv_sec); 531 return sprintf(buffer, "%lu\n", points); 532 } 533 534 /************************************************************************/ 535 /* Here the fs part begins */ 536 /************************************************************************/ 537 538 /* permission checks */ 539 static int proc_fd_access_allowed(struct inode *inode) 540 { 541 struct task_struct *task; 542 int allowed = 0; 543 /* Allow access to a task's file descriptors if it is us or we 544 * may use ptrace attach to the process and find out that 545 * information. 546 */ 547 task = get_proc_task(inode); 548 if (task) { 549 allowed = ptrace_may_attach(task); 550 put_task_struct(task); 551 } 552 return allowed; 553 } 554 555 extern struct seq_operations mounts_op; 556 struct proc_mounts { 557 struct seq_file m; 558 int event; 559 }; 560 561 static int mounts_open(struct inode *inode, struct file *file) 562 { 563 struct task_struct *task = get_proc_task(inode); 564 struct namespace *namespace = NULL; 565 struct proc_mounts *p; 566 int ret = -EINVAL; 567 568 if (task) { 569 task_lock(task); 570 namespace = task->namespace; 571 if (namespace) 572 get_namespace(namespace); 573 task_unlock(task); 574 put_task_struct(task); 575 } 576 577 if (namespace) { 578 ret = -ENOMEM; 579 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 580 if (p) { 581 file->private_data = &p->m; 582 ret = seq_open(file, &mounts_op); 583 if (!ret) { 584 p->m.private = namespace; 585 p->event = namespace->event; 586 return 0; 587 } 588 kfree(p); 589 } 590 put_namespace(namespace); 591 } 592 return ret; 593 } 594 595 static int mounts_release(struct inode *inode, struct file *file) 596 { 597 struct seq_file *m = file->private_data; 598 struct namespace *namespace = m->private; 599 put_namespace(namespace); 600 return seq_release(inode, file); 601 } 602 603 static unsigned mounts_poll(struct file *file, poll_table *wait) 604 { 605 struct proc_mounts *p = file->private_data; 606 struct namespace *ns = p->m.private; 607 unsigned res = 0; 608 609 poll_wait(file, &ns->poll, wait); 610 611 spin_lock(&vfsmount_lock); 612 if (p->event != ns->event) { 613 p->event = ns->event; 614 res = POLLERR; 615 } 616 spin_unlock(&vfsmount_lock); 617 618 return res; 619 } 620 621 static struct file_operations proc_mounts_operations = { 622 .open = mounts_open, 623 .read = seq_read, 624 .llseek = seq_lseek, 625 .release = mounts_release, 626 .poll = mounts_poll, 627 }; 628 629 extern struct seq_operations mountstats_op; 630 static int mountstats_open(struct inode *inode, struct file *file) 631 { 632 int ret = seq_open(file, &mountstats_op); 633 634 if (!ret) { 635 struct seq_file *m = file->private_data; 636 struct namespace *namespace = NULL; 637 struct task_struct *task = get_proc_task(inode); 638 639 if (task) { 640 task_lock(task); 641 namespace = task->namespace; 642 if (namespace) 643 get_namespace(namespace); 644 task_unlock(task); 645 put_task_struct(task); 646 } 647 648 if (namespace) 649 m->private = namespace; 650 else { 651 seq_release(inode, file); 652 ret = -EINVAL; 653 } 654 } 655 return ret; 656 } 657 658 static struct file_operations proc_mountstats_operations = { 659 .open = mountstats_open, 660 .read = seq_read, 661 .llseek = seq_lseek, 662 .release = mounts_release, 663 }; 664 665 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 666 667 static ssize_t proc_info_read(struct file * file, char __user * buf, 668 size_t count, loff_t *ppos) 669 { 670 struct inode * inode = file->f_dentry->d_inode; 671 unsigned long page; 672 ssize_t length; 673 struct task_struct *task = get_proc_task(inode); 674 675 length = -ESRCH; 676 if (!task) 677 goto out_no_task; 678 679 if (count > PROC_BLOCK_SIZE) 680 count = PROC_BLOCK_SIZE; 681 682 length = -ENOMEM; 683 if (!(page = __get_free_page(GFP_KERNEL))) 684 goto out; 685 686 length = PROC_I(inode)->op.proc_read(task, (char*)page); 687 688 if (length >= 0) 689 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 690 free_page(page); 691 out: 692 put_task_struct(task); 693 out_no_task: 694 return length; 695 } 696 697 static struct file_operations proc_info_file_operations = { 698 .read = proc_info_read, 699 }; 700 701 static int mem_open(struct inode* inode, struct file* file) 702 { 703 file->private_data = (void*)((long)current->self_exec_id); 704 return 0; 705 } 706 707 static ssize_t mem_read(struct file * file, char __user * buf, 708 size_t count, loff_t *ppos) 709 { 710 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 711 char *page; 712 unsigned long src = *ppos; 713 int ret = -ESRCH; 714 struct mm_struct *mm; 715 716 if (!task) 717 goto out_no_task; 718 719 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 720 goto out; 721 722 ret = -ENOMEM; 723 page = (char *)__get_free_page(GFP_USER); 724 if (!page) 725 goto out; 726 727 ret = 0; 728 729 mm = get_task_mm(task); 730 if (!mm) 731 goto out_free; 732 733 ret = -EIO; 734 735 if (file->private_data != (void*)((long)current->self_exec_id)) 736 goto out_put; 737 738 ret = 0; 739 740 while (count > 0) { 741 int this_len, retval; 742 743 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 744 retval = access_process_vm(task, src, page, this_len, 0); 745 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 746 if (!ret) 747 ret = -EIO; 748 break; 749 } 750 751 if (copy_to_user(buf, page, retval)) { 752 ret = -EFAULT; 753 break; 754 } 755 756 ret += retval; 757 src += retval; 758 buf += retval; 759 count -= retval; 760 } 761 *ppos = src; 762 763 out_put: 764 mmput(mm); 765 out_free: 766 free_page((unsigned long) page); 767 out: 768 put_task_struct(task); 769 out_no_task: 770 return ret; 771 } 772 773 #define mem_write NULL 774 775 #ifndef mem_write 776 /* This is a security hazard */ 777 static ssize_t mem_write(struct file * file, const char * buf, 778 size_t count, loff_t *ppos) 779 { 780 int copied = 0; 781 char *page; 782 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 783 unsigned long dst = *ppos; 784 785 copied = -ESRCH; 786 if (!task) 787 goto out_no_task; 788 789 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 790 goto out; 791 792 copied = -ENOMEM; 793 page = (char *)__get_free_page(GFP_USER); 794 if (!page) 795 goto out; 796 797 while (count > 0) { 798 int this_len, retval; 799 800 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 801 if (copy_from_user(page, buf, this_len)) { 802 copied = -EFAULT; 803 break; 804 } 805 retval = access_process_vm(task, dst, page, this_len, 1); 806 if (!retval) { 807 if (!copied) 808 copied = -EIO; 809 break; 810 } 811 copied += retval; 812 buf += retval; 813 dst += retval; 814 count -= retval; 815 } 816 *ppos = dst; 817 free_page((unsigned long) page); 818 out: 819 put_task_struct(task); 820 out_no_task: 821 return copied; 822 } 823 #endif 824 825 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 826 { 827 switch (orig) { 828 case 0: 829 file->f_pos = offset; 830 break; 831 case 1: 832 file->f_pos += offset; 833 break; 834 default: 835 return -EINVAL; 836 } 837 force_successful_syscall_return(); 838 return file->f_pos; 839 } 840 841 static struct file_operations proc_mem_operations = { 842 .llseek = mem_lseek, 843 .read = mem_read, 844 .write = mem_write, 845 .open = mem_open, 846 }; 847 848 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 849 size_t count, loff_t *ppos) 850 { 851 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 852 char buffer[PROC_NUMBUF]; 853 size_t len; 854 int oom_adjust; 855 loff_t __ppos = *ppos; 856 857 if (!task) 858 return -ESRCH; 859 oom_adjust = task->oomkilladj; 860 put_task_struct(task); 861 862 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 863 if (__ppos >= len) 864 return 0; 865 if (count > len-__ppos) 866 count = len-__ppos; 867 if (copy_to_user(buf, buffer + __ppos, count)) 868 return -EFAULT; 869 *ppos = __ppos + count; 870 return count; 871 } 872 873 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 874 size_t count, loff_t *ppos) 875 { 876 struct task_struct *task; 877 char buffer[PROC_NUMBUF], *end; 878 int oom_adjust; 879 880 if (!capable(CAP_SYS_RESOURCE)) 881 return -EPERM; 882 memset(buffer, 0, sizeof(buffer)); 883 if (count > sizeof(buffer) - 1) 884 count = sizeof(buffer) - 1; 885 if (copy_from_user(buffer, buf, count)) 886 return -EFAULT; 887 oom_adjust = simple_strtol(buffer, &end, 0); 888 if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) 889 return -EINVAL; 890 if (*end == '\n') 891 end++; 892 task = get_proc_task(file->f_dentry->d_inode); 893 if (!task) 894 return -ESRCH; 895 task->oomkilladj = oom_adjust; 896 put_task_struct(task); 897 if (end - buffer == 0) 898 return -EIO; 899 return end - buffer; 900 } 901 902 static struct file_operations proc_oom_adjust_operations = { 903 .read = oom_adjust_read, 904 .write = oom_adjust_write, 905 }; 906 907 #ifdef CONFIG_AUDITSYSCALL 908 #define TMPBUFLEN 21 909 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 910 size_t count, loff_t *ppos) 911 { 912 struct inode * inode = file->f_dentry->d_inode; 913 struct task_struct *task = get_proc_task(inode); 914 ssize_t length; 915 char tmpbuf[TMPBUFLEN]; 916 917 if (!task) 918 return -ESRCH; 919 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 920 audit_get_loginuid(task->audit_context)); 921 put_task_struct(task); 922 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 923 } 924 925 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 926 size_t count, loff_t *ppos) 927 { 928 struct inode * inode = file->f_dentry->d_inode; 929 char *page, *tmp; 930 ssize_t length; 931 uid_t loginuid; 932 933 if (!capable(CAP_AUDIT_CONTROL)) 934 return -EPERM; 935 936 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) 937 return -EPERM; 938 939 if (count >= PAGE_SIZE) 940 count = PAGE_SIZE - 1; 941 942 if (*ppos != 0) { 943 /* No partial writes. */ 944 return -EINVAL; 945 } 946 page = (char*)__get_free_page(GFP_USER); 947 if (!page) 948 return -ENOMEM; 949 length = -EFAULT; 950 if (copy_from_user(page, buf, count)) 951 goto out_free_page; 952 953 page[count] = '\0'; 954 loginuid = simple_strtoul(page, &tmp, 10); 955 if (tmp == page) { 956 length = -EINVAL; 957 goto out_free_page; 958 959 } 960 length = audit_set_loginuid(current, loginuid); 961 if (likely(length == 0)) 962 length = count; 963 964 out_free_page: 965 free_page((unsigned long) page); 966 return length; 967 } 968 969 static struct file_operations proc_loginuid_operations = { 970 .read = proc_loginuid_read, 971 .write = proc_loginuid_write, 972 }; 973 #endif 974 975 #ifdef CONFIG_SECCOMP 976 static ssize_t seccomp_read(struct file *file, char __user *buf, 977 size_t count, loff_t *ppos) 978 { 979 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); 980 char __buf[20]; 981 loff_t __ppos = *ppos; 982 size_t len; 983 984 if (!tsk) 985 return -ESRCH; 986 /* no need to print the trailing zero, so use only len */ 987 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 988 put_task_struct(tsk); 989 if (__ppos >= len) 990 return 0; 991 if (count > len - __ppos) 992 count = len - __ppos; 993 if (copy_to_user(buf, __buf + __ppos, count)) 994 return -EFAULT; 995 *ppos = __ppos + count; 996 return count; 997 } 998 999 static ssize_t seccomp_write(struct file *file, const char __user *buf, 1000 size_t count, loff_t *ppos) 1001 { 1002 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); 1003 char __buf[20], *end; 1004 unsigned int seccomp_mode; 1005 ssize_t result; 1006 1007 result = -ESRCH; 1008 if (!tsk) 1009 goto out_no_task; 1010 1011 /* can set it only once to be even more secure */ 1012 result = -EPERM; 1013 if (unlikely(tsk->seccomp.mode)) 1014 goto out; 1015 1016 result = -EFAULT; 1017 memset(__buf, 0, sizeof(__buf)); 1018 count = min(count, sizeof(__buf) - 1); 1019 if (copy_from_user(__buf, buf, count)) 1020 goto out; 1021 1022 seccomp_mode = simple_strtoul(__buf, &end, 0); 1023 if (*end == '\n') 1024 end++; 1025 result = -EINVAL; 1026 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1027 tsk->seccomp.mode = seccomp_mode; 1028 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1029 } else 1030 goto out; 1031 result = -EIO; 1032 if (unlikely(!(end - __buf))) 1033 goto out; 1034 result = end - __buf; 1035 out: 1036 put_task_struct(tsk); 1037 out_no_task: 1038 return result; 1039 } 1040 1041 static struct file_operations proc_seccomp_operations = { 1042 .read = seccomp_read, 1043 .write = seccomp_write, 1044 }; 1045 #endif /* CONFIG_SECCOMP */ 1046 1047 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1048 { 1049 struct inode *inode = dentry->d_inode; 1050 int error = -EACCES; 1051 1052 /* We don't need a base pointer in the /proc filesystem */ 1053 path_release(nd); 1054 1055 /* Are we allowed to snoop on the tasks file descriptors? */ 1056 if (!proc_fd_access_allowed(inode)) 1057 goto out; 1058 1059 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1060 nd->last_type = LAST_BIND; 1061 out: 1062 return ERR_PTR(error); 1063 } 1064 1065 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 1066 char __user *buffer, int buflen) 1067 { 1068 struct inode * inode; 1069 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 1070 int len; 1071 1072 if (!tmp) 1073 return -ENOMEM; 1074 1075 inode = dentry->d_inode; 1076 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 1077 len = PTR_ERR(path); 1078 if (IS_ERR(path)) 1079 goto out; 1080 len = tmp + PAGE_SIZE - 1 - path; 1081 1082 if (len > buflen) 1083 len = buflen; 1084 if (copy_to_user(buffer, path, len)) 1085 len = -EFAULT; 1086 out: 1087 free_page((unsigned long)tmp); 1088 return len; 1089 } 1090 1091 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1092 { 1093 int error = -EACCES; 1094 struct inode *inode = dentry->d_inode; 1095 struct dentry *de; 1096 struct vfsmount *mnt = NULL; 1097 1098 /* Are we allowed to snoop on the tasks file descriptors? */ 1099 if (!proc_fd_access_allowed(inode)) 1100 goto out; 1101 1102 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1103 if (error) 1104 goto out; 1105 1106 error = do_proc_readlink(de, mnt, buffer, buflen); 1107 dput(de); 1108 mntput(mnt); 1109 out: 1110 return error; 1111 } 1112 1113 static struct inode_operations proc_pid_link_inode_operations = { 1114 .readlink = proc_pid_readlink, 1115 .follow_link = proc_pid_follow_link 1116 }; 1117 1118 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1119 { 1120 struct dentry *dentry = filp->f_dentry; 1121 struct inode *inode = dentry->d_inode; 1122 struct task_struct *p = get_proc_task(inode); 1123 unsigned int fd, tid, ino; 1124 int retval; 1125 char buf[PROC_NUMBUF]; 1126 struct files_struct * files; 1127 struct fdtable *fdt; 1128 1129 retval = -ENOENT; 1130 if (!p) 1131 goto out_no_task; 1132 retval = 0; 1133 tid = p->pid; 1134 1135 fd = filp->f_pos; 1136 switch (fd) { 1137 case 0: 1138 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1139 goto out; 1140 filp->f_pos++; 1141 case 1: 1142 ino = parent_ino(dentry); 1143 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1144 goto out; 1145 filp->f_pos++; 1146 default: 1147 files = get_files_struct(p); 1148 if (!files) 1149 goto out; 1150 rcu_read_lock(); 1151 fdt = files_fdtable(files); 1152 for (fd = filp->f_pos-2; 1153 fd < fdt->max_fds; 1154 fd++, filp->f_pos++) { 1155 unsigned int i,j; 1156 1157 if (!fcheck_files(files, fd)) 1158 continue; 1159 rcu_read_unlock(); 1160 1161 j = PROC_NUMBUF; 1162 i = fd; 1163 do { 1164 j--; 1165 buf[j] = '0' + (i % 10); 1166 i /= 10; 1167 } while (i); 1168 1169 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1170 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1171 rcu_read_lock(); 1172 break; 1173 } 1174 rcu_read_lock(); 1175 } 1176 rcu_read_unlock(); 1177 put_files_struct(files); 1178 } 1179 out: 1180 put_task_struct(p); 1181 out_no_task: 1182 return retval; 1183 } 1184 1185 static int proc_pident_readdir(struct file *filp, 1186 void *dirent, filldir_t filldir, 1187 struct pid_entry *ents, unsigned int nents) 1188 { 1189 int i; 1190 int pid; 1191 struct dentry *dentry = filp->f_dentry; 1192 struct inode *inode = dentry->d_inode; 1193 struct task_struct *task = get_proc_task(inode); 1194 struct pid_entry *p; 1195 ino_t ino; 1196 int ret; 1197 1198 ret = -ENOENT; 1199 if (!task) 1200 goto out; 1201 1202 ret = 0; 1203 pid = task->pid; 1204 put_task_struct(task); 1205 i = filp->f_pos; 1206 switch (i) { 1207 case 0: 1208 ino = inode->i_ino; 1209 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1210 goto out; 1211 i++; 1212 filp->f_pos++; 1213 /* fall through */ 1214 case 1: 1215 ino = parent_ino(dentry); 1216 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1217 goto out; 1218 i++; 1219 filp->f_pos++; 1220 /* fall through */ 1221 default: 1222 i -= 2; 1223 if (i >= nents) { 1224 ret = 1; 1225 goto out; 1226 } 1227 p = ents + i; 1228 while (p->name) { 1229 if (filldir(dirent, p->name, p->len, filp->f_pos, 1230 fake_ino(pid, p->type), p->mode >> 12) < 0) 1231 goto out; 1232 filp->f_pos++; 1233 p++; 1234 } 1235 } 1236 1237 ret = 1; 1238 out: 1239 return ret; 1240 } 1241 1242 static int proc_tgid_base_readdir(struct file * filp, 1243 void * dirent, filldir_t filldir) 1244 { 1245 return proc_pident_readdir(filp,dirent,filldir, 1246 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1247 } 1248 1249 static int proc_tid_base_readdir(struct file * filp, 1250 void * dirent, filldir_t filldir) 1251 { 1252 return proc_pident_readdir(filp,dirent,filldir, 1253 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1254 } 1255 1256 /* building an inode */ 1257 1258 static int task_dumpable(struct task_struct *task) 1259 { 1260 int dumpable = 0; 1261 struct mm_struct *mm; 1262 1263 task_lock(task); 1264 mm = task->mm; 1265 if (mm) 1266 dumpable = mm->dumpable; 1267 task_unlock(task); 1268 if(dumpable == 1) 1269 return 1; 1270 return 0; 1271 } 1272 1273 1274 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1275 { 1276 struct inode * inode; 1277 struct proc_inode *ei; 1278 1279 /* We need a new inode */ 1280 1281 inode = new_inode(sb); 1282 if (!inode) 1283 goto out; 1284 1285 /* Common stuff */ 1286 ei = PROC_I(inode); 1287 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1288 inode->i_ino = fake_ino(task->pid, ino); 1289 1290 /* 1291 * grab the reference to task. 1292 */ 1293 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); 1294 if (!ei->pid) 1295 goto out_unlock; 1296 1297 inode->i_uid = 0; 1298 inode->i_gid = 0; 1299 if (task_dumpable(task)) { 1300 inode->i_uid = task->euid; 1301 inode->i_gid = task->egid; 1302 } 1303 security_task_to_inode(task, inode); 1304 1305 out: 1306 return inode; 1307 1308 out_unlock: 1309 iput(inode); 1310 return NULL; 1311 } 1312 1313 /* dentry stuff */ 1314 1315 /* 1316 * Exceptional case: normally we are not allowed to unhash a busy 1317 * directory. In this case, however, we can do it - no aliasing problems 1318 * due to the way we treat inodes. 1319 * 1320 * Rewrite the inode's ownerships here because the owning task may have 1321 * performed a setuid(), etc. 1322 * 1323 * Before the /proc/pid/status file was created the only way to read 1324 * the effective uid of a /process was to stat /proc/pid. Reading 1325 * /proc/pid/status is slow enough that procps and other packages 1326 * kept stating /proc/pid. To keep the rules in /proc simple I have 1327 * made this apply to all per process world readable and executable 1328 * directories. 1329 */ 1330 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1331 { 1332 struct inode *inode = dentry->d_inode; 1333 struct task_struct *task = get_proc_task(inode); 1334 if (task) { 1335 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1336 task_dumpable(task)) { 1337 inode->i_uid = task->euid; 1338 inode->i_gid = task->egid; 1339 } else { 1340 inode->i_uid = 0; 1341 inode->i_gid = 0; 1342 } 1343 security_task_to_inode(task, inode); 1344 put_task_struct(task); 1345 return 1; 1346 } 1347 d_drop(dentry); 1348 return 0; 1349 } 1350 1351 static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1352 { 1353 struct inode *inode = dentry->d_inode; 1354 struct task_struct *task; 1355 generic_fillattr(inode, stat); 1356 1357 rcu_read_lock(); 1358 stat->uid = 0; 1359 stat->gid = 0; 1360 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1361 if (task) { 1362 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1363 task_dumpable(task)) { 1364 stat->uid = task->euid; 1365 stat->gid = task->egid; 1366 } 1367 } 1368 rcu_read_unlock(); 1369 return 0; 1370 } 1371 1372 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1373 { 1374 struct inode *inode = dentry->d_inode; 1375 struct task_struct *task = get_proc_task(inode); 1376 int fd = proc_fd(inode); 1377 struct files_struct *files; 1378 1379 if (task) { 1380 files = get_files_struct(task); 1381 if (files) { 1382 rcu_read_lock(); 1383 if (fcheck_files(files, fd)) { 1384 rcu_read_unlock(); 1385 put_files_struct(files); 1386 if (task_dumpable(task)) { 1387 inode->i_uid = task->euid; 1388 inode->i_gid = task->egid; 1389 } else { 1390 inode->i_uid = 0; 1391 inode->i_gid = 0; 1392 } 1393 security_task_to_inode(task, inode); 1394 put_task_struct(task); 1395 return 1; 1396 } 1397 rcu_read_unlock(); 1398 put_files_struct(files); 1399 } 1400 put_task_struct(task); 1401 } 1402 d_drop(dentry); 1403 return 0; 1404 } 1405 1406 static int pid_delete_dentry(struct dentry * dentry) 1407 { 1408 /* Is the task we represent dead? 1409 * If so, then don't put the dentry on the lru list, 1410 * kill it immediately. 1411 */ 1412 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1413 } 1414 1415 static struct dentry_operations tid_fd_dentry_operations = 1416 { 1417 .d_revalidate = tid_fd_revalidate, 1418 .d_delete = pid_delete_dentry, 1419 }; 1420 1421 static struct dentry_operations pid_dentry_operations = 1422 { 1423 .d_revalidate = pid_revalidate, 1424 .d_delete = pid_delete_dentry, 1425 }; 1426 1427 /* Lookups */ 1428 1429 static unsigned name_to_int(struct dentry *dentry) 1430 { 1431 const char *name = dentry->d_name.name; 1432 int len = dentry->d_name.len; 1433 unsigned n = 0; 1434 1435 if (len > 1 && *name == '0') 1436 goto out; 1437 while (len-- > 0) { 1438 unsigned c = *name++ - '0'; 1439 if (c > 9) 1440 goto out; 1441 if (n >= (~0U-9)/10) 1442 goto out; 1443 n *= 10; 1444 n += c; 1445 } 1446 return n; 1447 out: 1448 return ~0U; 1449 } 1450 1451 /* SMP-safe */ 1452 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1453 { 1454 struct task_struct *task = get_proc_task(dir); 1455 unsigned fd = name_to_int(dentry); 1456 struct dentry *result = ERR_PTR(-ENOENT); 1457 struct file * file; 1458 struct files_struct * files; 1459 struct inode *inode; 1460 struct proc_inode *ei; 1461 1462 if (!task) 1463 goto out_no_task; 1464 if (fd == ~0U) 1465 goto out; 1466 1467 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1468 if (!inode) 1469 goto out; 1470 ei = PROC_I(inode); 1471 ei->fd = fd; 1472 files = get_files_struct(task); 1473 if (!files) 1474 goto out_unlock; 1475 inode->i_mode = S_IFLNK; 1476 1477 /* 1478 * We are not taking a ref to the file structure, so we must 1479 * hold ->file_lock. 1480 */ 1481 spin_lock(&files->file_lock); 1482 file = fcheck_files(files, fd); 1483 if (!file) 1484 goto out_unlock2; 1485 if (file->f_mode & 1) 1486 inode->i_mode |= S_IRUSR | S_IXUSR; 1487 if (file->f_mode & 2) 1488 inode->i_mode |= S_IWUSR | S_IXUSR; 1489 spin_unlock(&files->file_lock); 1490 put_files_struct(files); 1491 inode->i_op = &proc_pid_link_inode_operations; 1492 inode->i_size = 64; 1493 ei->op.proc_get_link = proc_fd_link; 1494 dentry->d_op = &tid_fd_dentry_operations; 1495 d_add(dentry, inode); 1496 /* Close the race of the process dying before we return the dentry */ 1497 if (tid_fd_revalidate(dentry, NULL)) 1498 result = NULL; 1499 out: 1500 put_task_struct(task); 1501 out_no_task: 1502 return result; 1503 1504 out_unlock2: 1505 spin_unlock(&files->file_lock); 1506 put_files_struct(files); 1507 out_unlock: 1508 iput(inode); 1509 goto out; 1510 } 1511 1512 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1513 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1514 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 1515 1516 static struct file_operations proc_fd_operations = { 1517 .read = generic_read_dir, 1518 .readdir = proc_readfd, 1519 }; 1520 1521 static struct file_operations proc_task_operations = { 1522 .read = generic_read_dir, 1523 .readdir = proc_task_readdir, 1524 }; 1525 1526 /* 1527 * proc directories can do almost nothing.. 1528 */ 1529 static struct inode_operations proc_fd_inode_operations = { 1530 .lookup = proc_lookupfd, 1531 }; 1532 1533 static struct inode_operations proc_task_inode_operations = { 1534 .lookup = proc_task_lookup, 1535 .getattr = proc_task_getattr, 1536 }; 1537 1538 #ifdef CONFIG_SECURITY 1539 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1540 size_t count, loff_t *ppos) 1541 { 1542 struct inode * inode = file->f_dentry->d_inode; 1543 unsigned long page; 1544 ssize_t length; 1545 struct task_struct *task = get_proc_task(inode); 1546 1547 length = -ESRCH; 1548 if (!task) 1549 goto out_no_task; 1550 1551 if (count > PAGE_SIZE) 1552 count = PAGE_SIZE; 1553 length = -ENOMEM; 1554 if (!(page = __get_free_page(GFP_KERNEL))) 1555 goto out; 1556 1557 length = security_getprocattr(task, 1558 (char*)file->f_dentry->d_name.name, 1559 (void*)page, count); 1560 if (length >= 0) 1561 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1562 free_page(page); 1563 out: 1564 put_task_struct(task); 1565 out_no_task: 1566 return length; 1567 } 1568 1569 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1570 size_t count, loff_t *ppos) 1571 { 1572 struct inode * inode = file->f_dentry->d_inode; 1573 char *page; 1574 ssize_t length; 1575 struct task_struct *task = get_proc_task(inode); 1576 1577 length = -ESRCH; 1578 if (!task) 1579 goto out_no_task; 1580 if (count > PAGE_SIZE) 1581 count = PAGE_SIZE; 1582 1583 /* No partial writes. */ 1584 length = -EINVAL; 1585 if (*ppos != 0) 1586 goto out; 1587 1588 length = -ENOMEM; 1589 page = (char*)__get_free_page(GFP_USER); 1590 if (!page) 1591 goto out; 1592 1593 length = -EFAULT; 1594 if (copy_from_user(page, buf, count)) 1595 goto out_free; 1596 1597 length = security_setprocattr(task, 1598 (char*)file->f_dentry->d_name.name, 1599 (void*)page, count); 1600 out_free: 1601 free_page((unsigned long) page); 1602 out: 1603 put_task_struct(task); 1604 out_no_task: 1605 return length; 1606 } 1607 1608 static struct file_operations proc_pid_attr_operations = { 1609 .read = proc_pid_attr_read, 1610 .write = proc_pid_attr_write, 1611 }; 1612 1613 static struct file_operations proc_tid_attr_operations; 1614 static struct inode_operations proc_tid_attr_inode_operations; 1615 static struct file_operations proc_tgid_attr_operations; 1616 static struct inode_operations proc_tgid_attr_inode_operations; 1617 #endif 1618 1619 /* SMP-safe */ 1620 static struct dentry *proc_pident_lookup(struct inode *dir, 1621 struct dentry *dentry, 1622 struct pid_entry *ents) 1623 { 1624 struct inode *inode; 1625 struct dentry *error; 1626 struct task_struct *task = get_proc_task(dir); 1627 struct pid_entry *p; 1628 struct proc_inode *ei; 1629 1630 error = ERR_PTR(-ENOENT); 1631 inode = NULL; 1632 1633 if (!task) 1634 goto out_no_task; 1635 1636 for (p = ents; p->name; p++) { 1637 if (p->len != dentry->d_name.len) 1638 continue; 1639 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1640 break; 1641 } 1642 if (!p->name) 1643 goto out; 1644 1645 error = ERR_PTR(-EINVAL); 1646 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1647 if (!inode) 1648 goto out; 1649 1650 ei = PROC_I(inode); 1651 inode->i_mode = p->mode; 1652 /* 1653 * Yes, it does not scale. And it should not. Don't add 1654 * new entries into /proc/<tgid>/ without very good reasons. 1655 */ 1656 switch(p->type) { 1657 case PROC_TGID_TASK: 1658 inode->i_nlink = 2; 1659 inode->i_op = &proc_task_inode_operations; 1660 inode->i_fop = &proc_task_operations; 1661 break; 1662 case PROC_TID_FD: 1663 case PROC_TGID_FD: 1664 inode->i_nlink = 2; 1665 inode->i_op = &proc_fd_inode_operations; 1666 inode->i_fop = &proc_fd_operations; 1667 break; 1668 case PROC_TID_EXE: 1669 case PROC_TGID_EXE: 1670 inode->i_op = &proc_pid_link_inode_operations; 1671 ei->op.proc_get_link = proc_exe_link; 1672 break; 1673 case PROC_TID_CWD: 1674 case PROC_TGID_CWD: 1675 inode->i_op = &proc_pid_link_inode_operations; 1676 ei->op.proc_get_link = proc_cwd_link; 1677 break; 1678 case PROC_TID_ROOT: 1679 case PROC_TGID_ROOT: 1680 inode->i_op = &proc_pid_link_inode_operations; 1681 ei->op.proc_get_link = proc_root_link; 1682 break; 1683 case PROC_TID_ENVIRON: 1684 case PROC_TGID_ENVIRON: 1685 inode->i_fop = &proc_info_file_operations; 1686 ei->op.proc_read = proc_pid_environ; 1687 break; 1688 case PROC_TID_AUXV: 1689 case PROC_TGID_AUXV: 1690 inode->i_fop = &proc_info_file_operations; 1691 ei->op.proc_read = proc_pid_auxv; 1692 break; 1693 case PROC_TID_STATUS: 1694 case PROC_TGID_STATUS: 1695 inode->i_fop = &proc_info_file_operations; 1696 ei->op.proc_read = proc_pid_status; 1697 break; 1698 case PROC_TID_STAT: 1699 inode->i_fop = &proc_info_file_operations; 1700 ei->op.proc_read = proc_tid_stat; 1701 break; 1702 case PROC_TGID_STAT: 1703 inode->i_fop = &proc_info_file_operations; 1704 ei->op.proc_read = proc_tgid_stat; 1705 break; 1706 case PROC_TID_CMDLINE: 1707 case PROC_TGID_CMDLINE: 1708 inode->i_fop = &proc_info_file_operations; 1709 ei->op.proc_read = proc_pid_cmdline; 1710 break; 1711 case PROC_TID_STATM: 1712 case PROC_TGID_STATM: 1713 inode->i_fop = &proc_info_file_operations; 1714 ei->op.proc_read = proc_pid_statm; 1715 break; 1716 case PROC_TID_MAPS: 1717 case PROC_TGID_MAPS: 1718 inode->i_fop = &proc_maps_operations; 1719 break; 1720 #ifdef CONFIG_NUMA 1721 case PROC_TID_NUMA_MAPS: 1722 case PROC_TGID_NUMA_MAPS: 1723 inode->i_fop = &proc_numa_maps_operations; 1724 break; 1725 #endif 1726 case PROC_TID_MEM: 1727 case PROC_TGID_MEM: 1728 inode->i_fop = &proc_mem_operations; 1729 break; 1730 #ifdef CONFIG_SECCOMP 1731 case PROC_TID_SECCOMP: 1732 case PROC_TGID_SECCOMP: 1733 inode->i_fop = &proc_seccomp_operations; 1734 break; 1735 #endif /* CONFIG_SECCOMP */ 1736 case PROC_TID_MOUNTS: 1737 case PROC_TGID_MOUNTS: 1738 inode->i_fop = &proc_mounts_operations; 1739 break; 1740 #ifdef CONFIG_MMU 1741 case PROC_TID_SMAPS: 1742 case PROC_TGID_SMAPS: 1743 inode->i_fop = &proc_smaps_operations; 1744 break; 1745 #endif 1746 case PROC_TID_MOUNTSTATS: 1747 case PROC_TGID_MOUNTSTATS: 1748 inode->i_fop = &proc_mountstats_operations; 1749 break; 1750 #ifdef CONFIG_SECURITY 1751 case PROC_TID_ATTR: 1752 inode->i_nlink = 2; 1753 inode->i_op = &proc_tid_attr_inode_operations; 1754 inode->i_fop = &proc_tid_attr_operations; 1755 break; 1756 case PROC_TGID_ATTR: 1757 inode->i_nlink = 2; 1758 inode->i_op = &proc_tgid_attr_inode_operations; 1759 inode->i_fop = &proc_tgid_attr_operations; 1760 break; 1761 case PROC_TID_ATTR_CURRENT: 1762 case PROC_TGID_ATTR_CURRENT: 1763 case PROC_TID_ATTR_PREV: 1764 case PROC_TGID_ATTR_PREV: 1765 case PROC_TID_ATTR_EXEC: 1766 case PROC_TGID_ATTR_EXEC: 1767 case PROC_TID_ATTR_FSCREATE: 1768 case PROC_TGID_ATTR_FSCREATE: 1769 case PROC_TID_ATTR_KEYCREATE: 1770 case PROC_TGID_ATTR_KEYCREATE: 1771 case PROC_TID_ATTR_SOCKCREATE: 1772 case PROC_TGID_ATTR_SOCKCREATE: 1773 inode->i_fop = &proc_pid_attr_operations; 1774 break; 1775 #endif 1776 #ifdef CONFIG_KALLSYMS 1777 case PROC_TID_WCHAN: 1778 case PROC_TGID_WCHAN: 1779 inode->i_fop = &proc_info_file_operations; 1780 ei->op.proc_read = proc_pid_wchan; 1781 break; 1782 #endif 1783 #ifdef CONFIG_SCHEDSTATS 1784 case PROC_TID_SCHEDSTAT: 1785 case PROC_TGID_SCHEDSTAT: 1786 inode->i_fop = &proc_info_file_operations; 1787 ei->op.proc_read = proc_pid_schedstat; 1788 break; 1789 #endif 1790 #ifdef CONFIG_CPUSETS 1791 case PROC_TID_CPUSET: 1792 case PROC_TGID_CPUSET: 1793 inode->i_fop = &proc_cpuset_operations; 1794 break; 1795 #endif 1796 case PROC_TID_OOM_SCORE: 1797 case PROC_TGID_OOM_SCORE: 1798 inode->i_fop = &proc_info_file_operations; 1799 ei->op.proc_read = proc_oom_score; 1800 break; 1801 case PROC_TID_OOM_ADJUST: 1802 case PROC_TGID_OOM_ADJUST: 1803 inode->i_fop = &proc_oom_adjust_operations; 1804 break; 1805 #ifdef CONFIG_AUDITSYSCALL 1806 case PROC_TID_LOGINUID: 1807 case PROC_TGID_LOGINUID: 1808 inode->i_fop = &proc_loginuid_operations; 1809 break; 1810 #endif 1811 default: 1812 printk("procfs: impossible type (%d)",p->type); 1813 iput(inode); 1814 error = ERR_PTR(-EINVAL); 1815 goto out; 1816 } 1817 dentry->d_op = &pid_dentry_operations; 1818 d_add(dentry, inode); 1819 /* Close the race of the process dying before we return the dentry */ 1820 if (pid_revalidate(dentry, NULL)) 1821 error = NULL; 1822 out: 1823 put_task_struct(task); 1824 out_no_task: 1825 return error; 1826 } 1827 1828 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1829 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1830 } 1831 1832 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1833 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1834 } 1835 1836 static struct file_operations proc_tgid_base_operations = { 1837 .read = generic_read_dir, 1838 .readdir = proc_tgid_base_readdir, 1839 }; 1840 1841 static struct file_operations proc_tid_base_operations = { 1842 .read = generic_read_dir, 1843 .readdir = proc_tid_base_readdir, 1844 }; 1845 1846 static struct inode_operations proc_tgid_base_inode_operations = { 1847 .lookup = proc_tgid_base_lookup, 1848 .getattr = pid_getattr, 1849 }; 1850 1851 static struct inode_operations proc_tid_base_inode_operations = { 1852 .lookup = proc_tid_base_lookup, 1853 .getattr = pid_getattr, 1854 }; 1855 1856 #ifdef CONFIG_SECURITY 1857 static int proc_tgid_attr_readdir(struct file * filp, 1858 void * dirent, filldir_t filldir) 1859 { 1860 return proc_pident_readdir(filp,dirent,filldir, 1861 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1862 } 1863 1864 static int proc_tid_attr_readdir(struct file * filp, 1865 void * dirent, filldir_t filldir) 1866 { 1867 return proc_pident_readdir(filp,dirent,filldir, 1868 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1869 } 1870 1871 static struct file_operations proc_tgid_attr_operations = { 1872 .read = generic_read_dir, 1873 .readdir = proc_tgid_attr_readdir, 1874 }; 1875 1876 static struct file_operations proc_tid_attr_operations = { 1877 .read = generic_read_dir, 1878 .readdir = proc_tid_attr_readdir, 1879 }; 1880 1881 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1882 struct dentry *dentry, struct nameidata *nd) 1883 { 1884 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1885 } 1886 1887 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1888 struct dentry *dentry, struct nameidata *nd) 1889 { 1890 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1891 } 1892 1893 static struct inode_operations proc_tgid_attr_inode_operations = { 1894 .lookup = proc_tgid_attr_lookup, 1895 .getattr = pid_getattr, 1896 }; 1897 1898 static struct inode_operations proc_tid_attr_inode_operations = { 1899 .lookup = proc_tid_attr_lookup, 1900 .getattr = pid_getattr, 1901 }; 1902 #endif 1903 1904 /* 1905 * /proc/self: 1906 */ 1907 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1908 int buflen) 1909 { 1910 char tmp[PROC_NUMBUF]; 1911 sprintf(tmp, "%d", current->tgid); 1912 return vfs_readlink(dentry,buffer,buflen,tmp); 1913 } 1914 1915 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1916 { 1917 char tmp[PROC_NUMBUF]; 1918 sprintf(tmp, "%d", current->tgid); 1919 return ERR_PTR(vfs_follow_link(nd,tmp)); 1920 } 1921 1922 static struct inode_operations proc_self_inode_operations = { 1923 .readlink = proc_self_readlink, 1924 .follow_link = proc_self_follow_link, 1925 }; 1926 1927 /** 1928 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 1929 * 1930 * @task: task that should be flushed. 1931 * 1932 * Looks in the dcache for 1933 * /proc/@pid 1934 * /proc/@tgid/task/@pid 1935 * if either directory is present flushes it and all of it'ts children 1936 * from the dcache. 1937 * 1938 * It is safe and reasonable to cache /proc entries for a task until 1939 * that task exits. After that they just clog up the dcache with 1940 * useless entries, possibly causing useful dcache entries to be 1941 * flushed instead. This routine is proved to flush those useless 1942 * dcache entries at process exit time. 1943 * 1944 * NOTE: This routine is just an optimization so it does not guarantee 1945 * that no dcache entries will exist at process exit time it 1946 * just makes it very unlikely that any will persist. 1947 */ 1948 void proc_flush_task(struct task_struct *task) 1949 { 1950 struct dentry *dentry, *leader, *dir; 1951 char buf[PROC_NUMBUF]; 1952 struct qstr name; 1953 1954 name.name = buf; 1955 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 1956 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); 1957 if (dentry) { 1958 shrink_dcache_parent(dentry); 1959 d_drop(dentry); 1960 dput(dentry); 1961 } 1962 1963 if (thread_group_leader(task)) 1964 goto out; 1965 1966 name.name = buf; 1967 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); 1968 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); 1969 if (!leader) 1970 goto out; 1971 1972 name.name = "task"; 1973 name.len = strlen(name.name); 1974 dir = d_hash_and_lookup(leader, &name); 1975 if (!dir) 1976 goto out_put_leader; 1977 1978 name.name = buf; 1979 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 1980 dentry = d_hash_and_lookup(dir, &name); 1981 if (dentry) { 1982 shrink_dcache_parent(dentry); 1983 d_drop(dentry); 1984 dput(dentry); 1985 } 1986 1987 dput(dir); 1988 out_put_leader: 1989 dput(leader); 1990 out: 1991 return; 1992 } 1993 1994 /* SMP-safe */ 1995 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1996 { 1997 struct dentry *result = ERR_PTR(-ENOENT); 1998 struct task_struct *task; 1999 struct inode *inode; 2000 struct proc_inode *ei; 2001 unsigned tgid; 2002 2003 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2004 inode = new_inode(dir->i_sb); 2005 if (!inode) 2006 return ERR_PTR(-ENOMEM); 2007 ei = PROC_I(inode); 2008 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2009 inode->i_ino = fake_ino(0, PROC_TGID_INO); 2010 ei->pde = NULL; 2011 inode->i_mode = S_IFLNK|S_IRWXUGO; 2012 inode->i_uid = inode->i_gid = 0; 2013 inode->i_size = 64; 2014 inode->i_op = &proc_self_inode_operations; 2015 d_add(dentry, inode); 2016 return NULL; 2017 } 2018 tgid = name_to_int(dentry); 2019 if (tgid == ~0U) 2020 goto out; 2021 2022 rcu_read_lock(); 2023 task = find_task_by_pid(tgid); 2024 if (task) 2025 get_task_struct(task); 2026 rcu_read_unlock(); 2027 if (!task) 2028 goto out; 2029 2030 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2031 if (!inode) 2032 goto out_put_task; 2033 2034 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2035 inode->i_op = &proc_tgid_base_inode_operations; 2036 inode->i_fop = &proc_tgid_base_operations; 2037 inode->i_flags|=S_IMMUTABLE; 2038 #ifdef CONFIG_SECURITY 2039 inode->i_nlink = 5; 2040 #else 2041 inode->i_nlink = 4; 2042 #endif 2043 2044 dentry->d_op = &pid_dentry_operations; 2045 2046 d_add(dentry, inode); 2047 /* Close the race of the process dying before we return the dentry */ 2048 if (pid_revalidate(dentry, NULL)) 2049 result = NULL; 2050 2051 out_put_task: 2052 put_task_struct(task); 2053 out: 2054 return result; 2055 } 2056 2057 /* SMP-safe */ 2058 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2059 { 2060 struct dentry *result = ERR_PTR(-ENOENT); 2061 struct task_struct *task; 2062 struct task_struct *leader = get_proc_task(dir); 2063 struct inode *inode; 2064 unsigned tid; 2065 2066 if (!leader) 2067 goto out_no_task; 2068 2069 tid = name_to_int(dentry); 2070 if (tid == ~0U) 2071 goto out; 2072 2073 rcu_read_lock(); 2074 task = find_task_by_pid(tid); 2075 if (task) 2076 get_task_struct(task); 2077 rcu_read_unlock(); 2078 if (!task) 2079 goto out; 2080 if (leader->tgid != task->tgid) 2081 goto out_drop_task; 2082 2083 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 2084 2085 2086 if (!inode) 2087 goto out_drop_task; 2088 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2089 inode->i_op = &proc_tid_base_inode_operations; 2090 inode->i_fop = &proc_tid_base_operations; 2091 inode->i_flags|=S_IMMUTABLE; 2092 #ifdef CONFIG_SECURITY 2093 inode->i_nlink = 4; 2094 #else 2095 inode->i_nlink = 3; 2096 #endif 2097 2098 dentry->d_op = &pid_dentry_operations; 2099 2100 d_add(dentry, inode); 2101 /* Close the race of the process dying before we return the dentry */ 2102 if (pid_revalidate(dentry, NULL)) 2103 result = NULL; 2104 2105 out_drop_task: 2106 put_task_struct(task); 2107 out: 2108 put_task_struct(leader); 2109 out_no_task: 2110 return result; 2111 } 2112 2113 /* 2114 * Find the first tgid to return to user space. 2115 * 2116 * Usually this is just whatever follows &init_task, but if the users 2117 * buffer was too small to hold the full list or there was a seek into 2118 * the middle of the directory we have more work to do. 2119 * 2120 * In the case of a short read we start with find_task_by_pid. 2121 * 2122 * In the case of a seek we start with &init_task and walk nr 2123 * threads past it. 2124 */ 2125 static struct task_struct *first_tgid(int tgid, unsigned int nr) 2126 { 2127 struct task_struct *pos; 2128 rcu_read_lock(); 2129 if (tgid && nr) { 2130 pos = find_task_by_pid(tgid); 2131 if (pos && thread_group_leader(pos)) 2132 goto found; 2133 } 2134 /* If nr exceeds the number of processes get out quickly */ 2135 pos = NULL; 2136 if (nr && nr >= nr_processes()) 2137 goto done; 2138 2139 /* If we haven't found our starting place yet start with 2140 * the init_task and walk nr tasks forward. 2141 */ 2142 for (pos = next_task(&init_task); nr > 0; --nr) { 2143 pos = next_task(pos); 2144 if (pos == &init_task) { 2145 pos = NULL; 2146 goto done; 2147 } 2148 } 2149 found: 2150 get_task_struct(pos); 2151 done: 2152 rcu_read_unlock(); 2153 return pos; 2154 } 2155 2156 /* 2157 * Find the next task in the task list. 2158 * Return NULL if we loop or there is any error. 2159 * 2160 * The reference to the input task_struct is released. 2161 */ 2162 static struct task_struct *next_tgid(struct task_struct *start) 2163 { 2164 struct task_struct *pos; 2165 rcu_read_lock(); 2166 pos = start; 2167 if (pid_alive(start)) 2168 pos = next_task(start); 2169 if (pid_alive(pos) && (pos != &init_task)) { 2170 get_task_struct(pos); 2171 goto done; 2172 } 2173 pos = NULL; 2174 done: 2175 rcu_read_unlock(); 2176 put_task_struct(start); 2177 return pos; 2178 } 2179 2180 /* for the /proc/ directory itself, after non-process stuff has been done */ 2181 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2182 { 2183 char buf[PROC_NUMBUF]; 2184 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2185 struct task_struct *task; 2186 int tgid; 2187 2188 if (!nr) { 2189 ino_t ino = fake_ino(0,PROC_TGID_INO); 2190 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 2191 return 0; 2192 filp->f_pos++; 2193 nr++; 2194 } 2195 nr -= 1; 2196 2197 /* f_version caches the tgid value that the last readdir call couldn't 2198 * return. lseek aka telldir automagically resets f_version to 0. 2199 */ 2200 tgid = filp->f_version; 2201 filp->f_version = 0; 2202 for (task = first_tgid(tgid, nr); 2203 task; 2204 task = next_tgid(task), filp->f_pos++) { 2205 int len; 2206 ino_t ino; 2207 tgid = task->pid; 2208 len = snprintf(buf, sizeof(buf), "%d", tgid); 2209 ino = fake_ino(tgid, PROC_TGID_INO); 2210 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { 2211 /* returning this tgid failed, save it as the first 2212 * pid for the next readir call */ 2213 filp->f_version = tgid; 2214 put_task_struct(task); 2215 break; 2216 } 2217 } 2218 return 0; 2219 } 2220 2221 /* 2222 * Find the first tid of a thread group to return to user space. 2223 * 2224 * Usually this is just the thread group leader, but if the users 2225 * buffer was too small or there was a seek into the middle of the 2226 * directory we have more work todo. 2227 * 2228 * In the case of a short read we start with find_task_by_pid. 2229 * 2230 * In the case of a seek we start with the leader and walk nr 2231 * threads past it. 2232 */ 2233 static struct task_struct *first_tid(struct task_struct *leader, 2234 int tid, int nr) 2235 { 2236 struct task_struct *pos; 2237 2238 rcu_read_lock(); 2239 /* Attempt to start with the pid of a thread */ 2240 if (tid && (nr > 0)) { 2241 pos = find_task_by_pid(tid); 2242 if (pos && (pos->group_leader == leader)) 2243 goto found; 2244 } 2245 2246 /* If nr exceeds the number of threads there is nothing todo */ 2247 pos = NULL; 2248 if (nr && nr >= get_nr_threads(leader)) 2249 goto out; 2250 2251 /* If we haven't found our starting place yet start 2252 * with the leader and walk nr threads forward. 2253 */ 2254 for (pos = leader; nr > 0; --nr) { 2255 pos = next_thread(pos); 2256 if (pos == leader) { 2257 pos = NULL; 2258 goto out; 2259 } 2260 } 2261 found: 2262 get_task_struct(pos); 2263 out: 2264 rcu_read_unlock(); 2265 return pos; 2266 } 2267 2268 /* 2269 * Find the next thread in the thread list. 2270 * Return NULL if there is an error or no next thread. 2271 * 2272 * The reference to the input task_struct is released. 2273 */ 2274 static struct task_struct *next_tid(struct task_struct *start) 2275 { 2276 struct task_struct *pos = NULL; 2277 rcu_read_lock(); 2278 if (pid_alive(start)) { 2279 pos = next_thread(start); 2280 if (thread_group_leader(pos)) 2281 pos = NULL; 2282 else 2283 get_task_struct(pos); 2284 } 2285 rcu_read_unlock(); 2286 put_task_struct(start); 2287 return pos; 2288 } 2289 2290 /* for the /proc/TGID/task/ directories */ 2291 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2292 { 2293 char buf[PROC_NUMBUF]; 2294 struct dentry *dentry = filp->f_dentry; 2295 struct inode *inode = dentry->d_inode; 2296 struct task_struct *leader = get_proc_task(inode); 2297 struct task_struct *task; 2298 int retval = -ENOENT; 2299 ino_t ino; 2300 int tid; 2301 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2302 2303 if (!leader) 2304 goto out_no_task; 2305 retval = 0; 2306 2307 switch (pos) { 2308 case 0: 2309 ino = inode->i_ino; 2310 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2311 goto out; 2312 pos++; 2313 /* fall through */ 2314 case 1: 2315 ino = parent_ino(dentry); 2316 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2317 goto out; 2318 pos++; 2319 /* fall through */ 2320 } 2321 2322 /* f_version caches the tgid value that the last readdir call couldn't 2323 * return. lseek aka telldir automagically resets f_version to 0. 2324 */ 2325 tid = filp->f_version; 2326 filp->f_version = 0; 2327 for (task = first_tid(leader, tid, pos - 2); 2328 task; 2329 task = next_tid(task), pos++) { 2330 int len; 2331 tid = task->pid; 2332 len = snprintf(buf, sizeof(buf), "%d", tid); 2333 ino = fake_ino(tid, PROC_TID_INO); 2334 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { 2335 /* returning this tgid failed, save it as the first 2336 * pid for the next readir call */ 2337 filp->f_version = tid; 2338 put_task_struct(task); 2339 break; 2340 } 2341 } 2342 out: 2343 filp->f_pos = pos; 2344 put_task_struct(leader); 2345 out_no_task: 2346 return retval; 2347 } 2348 2349 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 2350 { 2351 struct inode *inode = dentry->d_inode; 2352 struct task_struct *p = get_proc_task(inode); 2353 generic_fillattr(inode, stat); 2354 2355 if (p) { 2356 rcu_read_lock(); 2357 stat->nlink += get_nr_threads(p); 2358 rcu_read_unlock(); 2359 put_task_struct(p); 2360 } 2361 2362 return 0; 2363 } 2364